xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 003fa74bb2d374584d33bc980c4c08b1e1529044)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287 
288   PetscFunctionBegin;
289   PetscCall(MatGetSize(A, &m, &n));
290   PetscCall(PetscCalloc1(n, &work));
291   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
292   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
294   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
295   if (type == NORM_2) {
296     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
297     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
298   } else if (type == NORM_1) {
299     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
300     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
301   } else if (type == NORM_INFINITY) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
304   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
307   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
310   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
311   if (type == NORM_INFINITY) {
312     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
313   } else {
314     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
315   }
316   PetscCall(PetscFree(work));
317   if (type == NORM_2) {
318     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
319   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
320     for (i = 0; i < n; i++) reductions[i] /= m;
321   }
322   PetscFunctionReturn(PETSC_SUCCESS);
323 }
324 
325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
326 {
327   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
328   IS              sis, gis;
329   const PetscInt *isis, *igis;
330   PetscInt        n, *iis, nsis, ngis, rstart, i;
331 
332   PetscFunctionBegin;
333   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
334   PetscCall(MatFindNonzeroRows(a->B, &gis));
335   PetscCall(ISGetSize(gis, &ngis));
336   PetscCall(ISGetSize(sis, &nsis));
337   PetscCall(ISGetIndices(sis, &isis));
338   PetscCall(ISGetIndices(gis, &igis));
339 
340   PetscCall(PetscMalloc1(ngis + nsis, &iis));
341   PetscCall(PetscArraycpy(iis, igis, ngis));
342   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
343   n = ngis + nsis;
344   PetscCall(PetscSortRemoveDupsInt(&n, iis));
345   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
346   for (i = 0; i < n; i++) iis[i] += rstart;
347   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
348 
349   PetscCall(ISRestoreIndices(sis, &isis));
350   PetscCall(ISRestoreIndices(gis, &igis));
351   PetscCall(ISDestroy(&sis));
352   PetscCall(ISDestroy(&gis));
353   PetscFunctionReturn(PETSC_SUCCESS);
354 }
355 
356 /*
357   Local utility routine that creates a mapping from the global column
358 number to the local number in the off-diagonal part of the local
359 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
360 a slightly higher hash table cost; without it it is not scalable (each processor
361 has an order N integer array but is fast to access.
362 */
363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
364 {
365   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
366   PetscInt    n   = aij->B->cmap->n, i;
367 
368   PetscFunctionBegin;
369   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
370 #if defined(PETSC_USE_CTABLE)
371   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
372   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
373 #else
374   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
375   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
376 #endif
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
381   do { \
382     if (col <= lastcol1) low1 = 0; \
383     else high1 = nrow1; \
384     lastcol1 = col; \
385     while (high1 - low1 > 5) { \
386       t = (low1 + high1) / 2; \
387       if (rp1[t] > col) high1 = t; \
388       else low1 = t; \
389     } \
390     for (_i = low1; _i < high1; _i++) { \
391       if (rp1[_i] > col) break; \
392       if (rp1[_i] == col) { \
393         if (addv == ADD_VALUES) { \
394           ap1[_i] += value; \
395           /* Not sure LogFlops will slow dow the code or not */ \
396           (void)PetscLogFlops(1.0); \
397         } else ap1[_i] = value; \
398         goto a_noinsert; \
399       } \
400     } \
401     if (value == 0.0 && ignorezeroentries && row != col) { \
402       low1  = 0; \
403       high1 = nrow1; \
404       goto a_noinsert; \
405     } \
406     if (nonew == 1) { \
407       low1  = 0; \
408       high1 = nrow1; \
409       goto a_noinsert; \
410     } \
411     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
412     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
413     N = nrow1++ - 1; \
414     a->nz++; \
415     high1++; \
416     /* shift up all the later entries in this row */ \
417     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
418     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
419     rp1[_i] = col; \
420     ap1[_i] = value; \
421   a_noinsert:; \
422     ailen[row] = nrow1; \
423   } while (0)
424 
425 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
426   do { \
427     if (col <= lastcol2) low2 = 0; \
428     else high2 = nrow2; \
429     lastcol2 = col; \
430     while (high2 - low2 > 5) { \
431       t = (low2 + high2) / 2; \
432       if (rp2[t] > col) high2 = t; \
433       else low2 = t; \
434     } \
435     for (_i = low2; _i < high2; _i++) { \
436       if (rp2[_i] > col) break; \
437       if (rp2[_i] == col) { \
438         if (addv == ADD_VALUES) { \
439           ap2[_i] += value; \
440           (void)PetscLogFlops(1.0); \
441         } else ap2[_i] = value; \
442         goto b_noinsert; \
443       } \
444     } \
445     if (value == 0.0 && ignorezeroentries) { \
446       low2  = 0; \
447       high2 = nrow2; \
448       goto b_noinsert; \
449     } \
450     if (nonew == 1) { \
451       low2  = 0; \
452       high2 = nrow2; \
453       goto b_noinsert; \
454     } \
455     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
456     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
457     N = nrow2++ - 1; \
458     b->nz++; \
459     high2++; \
460     /* shift up all the later entries in this row */ \
461     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
462     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
463     rp2[_i] = col; \
464     ap2[_i] = value; \
465   b_noinsert:; \
466     bilen[row] = nrow2; \
467   } while (0)
468 
469 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
470 {
471   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
472   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
473   PetscInt     l, *garray                         = mat->garray, diag;
474   PetscScalar *aa, *ba;
475 
476   PetscFunctionBegin;
477   /* code only works for square matrices A */
478 
479   /* find size of row to the left of the diagonal part */
480   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
481   row = row - diag;
482   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
483     if (garray[b->j[b->i[row] + l]] > diag) break;
484   }
485   if (l) {
486     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
487     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
488     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
489   }
490 
491   /* diagonal part */
492   if (a->i[row + 1] - a->i[row]) {
493     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
494     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
495     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
496   }
497 
498   /* right of diagonal part */
499   if (b->i[row + 1] - b->i[row] - l) {
500     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
501     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
502     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
503   }
504   PetscFunctionReturn(PETSC_SUCCESS);
505 }
506 
507 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
508 {
509   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
510   PetscScalar value = 0.0;
511   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
512   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
513   PetscBool   roworiented = aij->roworiented;
514 
515   /* Some Variables required in the macro */
516   Mat         A     = aij->A;
517   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
518   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
519   PetscBool   ignorezeroentries = a->ignorezeroentries;
520   Mat         B                 = aij->B;
521   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
522   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
523   MatScalar  *aa, *ba;
524   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
525   PetscInt    nonew;
526   MatScalar  *ap1, *ap2;
527 
528   PetscFunctionBegin;
529   PetscCall(MatSeqAIJGetArray(A, &aa));
530   PetscCall(MatSeqAIJGetArray(B, &ba));
531   for (i = 0; i < m; i++) {
532     if (im[i] < 0) continue;
533     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
534     if (im[i] >= rstart && im[i] < rend) {
535       row      = im[i] - rstart;
536       lastcol1 = -1;
537       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
538       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
539       rmax1    = aimax[row];
540       nrow1    = ailen[row];
541       low1     = 0;
542       high1    = nrow1;
543       lastcol2 = -1;
544       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
545       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
546       rmax2    = bimax[row];
547       nrow2    = bilen[row];
548       low2     = 0;
549       high2    = nrow2;
550 
551       for (j = 0; j < n; j++) {
552         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
553         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
554         if (in[j] >= cstart && in[j] < cend) {
555           col   = in[j] - cstart;
556           nonew = a->nonew;
557           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
558         } else if (in[j] < 0) {
559           continue;
560         } else {
561           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
562           if (mat->was_assembled) {
563             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
564 #if defined(PETSC_USE_CTABLE)
565             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
566             col--;
567 #else
568             col = aij->colmap[in[j]] - 1;
569 #endif
570             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
571               PetscCall(MatDisAssemble_MPIAIJ(mat));               /* Change aij->B from reduced/local format to expanded/global format */
572               col = in[j];
573               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
574               B     = aij->B;
575               b     = (Mat_SeqAIJ *)B->data;
576               bimax = b->imax;
577               bi    = b->i;
578               bilen = b->ilen;
579               bj    = b->j;
580               ba    = b->a;
581               rp2   = bj + bi[row];
582               ap2   = ba + bi[row];
583               rmax2 = bimax[row];
584               nrow2 = bilen[row];
585               low2  = 0;
586               high2 = nrow2;
587               bm    = aij->B->rmap->n;
588               ba    = b->a;
589             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
590               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
591                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
592               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
593             }
594           } else col = in[j];
595           nonew = b->nonew;
596           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
597         }
598       }
599     } else {
600       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
601       if (!aij->donotstash) {
602         mat->assembled = PETSC_FALSE;
603         if (roworiented) {
604           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
605         } else {
606           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         }
608       }
609     }
610   }
611   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
612   PetscCall(MatSeqAIJRestoreArray(B, &ba));
613   PetscFunctionReturn(PETSC_SUCCESS);
614 }
615 
616 /*
617     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
618     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
619     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
620 */
621 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
622 {
623   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
624   Mat         A      = aij->A; /* diagonal part of the matrix */
625   Mat         B      = aij->B; /* off-diagonal part of the matrix */
626   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
627   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
628   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
629   PetscInt   *ailen = a->ilen, *aj = a->j;
630   PetscInt   *bilen = b->ilen, *bj = b->j;
631   PetscInt    am          = aij->A->rmap->n, j;
632   PetscInt    diag_so_far = 0, dnz;
633   PetscInt    offd_so_far = 0, onz;
634 
635   PetscFunctionBegin;
636   /* Iterate over all rows of the matrix */
637   for (j = 0; j < am; j++) {
638     dnz = onz = 0;
639     /*  Iterate over all non-zero columns of the current row */
640     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
641       /* If column is in the diagonal */
642       if (mat_j[col] >= cstart && mat_j[col] < cend) {
643         aj[diag_so_far++] = mat_j[col] - cstart;
644         dnz++;
645       } else { /* off-diagonal entries */
646         bj[offd_so_far++] = mat_j[col];
647         onz++;
648       }
649     }
650     ailen[j] = dnz;
651     bilen[j] = onz;
652   }
653   PetscFunctionReturn(PETSC_SUCCESS);
654 }
655 
656 /*
657     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
658     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
659     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
660     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
661     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
662 */
663 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
664 {
665   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
666   Mat          A    = aij->A; /* diagonal part of the matrix */
667   Mat          B    = aij->B; /* off-diagonal part of the matrix */
668   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
669   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
670   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
671   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
672   PetscInt    *ailen = a->ilen, *aj = a->j;
673   PetscInt    *bilen = b->ilen, *bj = b->j;
674   PetscInt     am          = aij->A->rmap->n, j;
675   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
676   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
677   PetscScalar *aa = a->a, *ba = b->a;
678 
679   PetscFunctionBegin;
680   /* Iterate over all rows of the matrix */
681   for (j = 0; j < am; j++) {
682     dnz_row = onz_row = 0;
683     rowstart_offd     = full_offd_i[j];
684     rowstart_diag     = full_diag_i[j];
685     /*  Iterate over all non-zero columns of the current row */
686     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
687       /* If column is in the diagonal */
688       if (mat_j[col] >= cstart && mat_j[col] < cend) {
689         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
690         aa[rowstart_diag + dnz_row] = mat_a[col];
691         dnz_row++;
692       } else { /* off-diagonal entries */
693         bj[rowstart_offd + onz_row] = mat_j[col];
694         ba[rowstart_offd + onz_row] = mat_a[col];
695         onz_row++;
696       }
697     }
698     ailen[j] = dnz_row;
699     bilen[j] = onz_row;
700   }
701   PetscFunctionReturn(PETSC_SUCCESS);
702 }
703 
704 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
705 {
706   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
707   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
708   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
709 
710   PetscFunctionBegin;
711   for (i = 0; i < m; i++) {
712     if (idxm[i] < 0) continue; /* negative row */
713     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
714     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
715     row = idxm[i] - rstart;
716     for (j = 0; j < n; j++) {
717       if (idxn[j] < 0) continue; /* negative column */
718       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
719       if (idxn[j] >= cstart && idxn[j] < cend) {
720         col = idxn[j] - cstart;
721         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
722       } else {
723         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
724 #if defined(PETSC_USE_CTABLE)
725         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
726         col--;
727 #else
728         col = aij->colmap[idxn[j]] - 1;
729 #endif
730         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
731         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
732       }
733     }
734   }
735   PetscFunctionReturn(PETSC_SUCCESS);
736 }
737 
738 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
739 {
740   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
741   PetscInt    nstash, reallocs;
742 
743   PetscFunctionBegin;
744   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
745 
746   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
747   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
748   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
749   PetscFunctionReturn(PETSC_SUCCESS);
750 }
751 
752 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
753 {
754   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
755   PetscMPIInt  n;
756   PetscInt     i, j, rstart, ncols, flg;
757   PetscInt    *row, *col;
758   PetscBool    other_disassembled;
759   PetscScalar *val;
760 
761   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
762 
763   PetscFunctionBegin;
764   if (!aij->donotstash && !mat->nooffprocentries) {
765     while (1) {
766       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
767       if (!flg) break;
768 
769       for (i = 0; i < n;) {
770         /* Now identify the consecutive vals belonging to the same row */
771         for (j = i, rstart = row[j]; j < n; j++) {
772           if (row[j] != rstart) break;
773         }
774         if (j < n) ncols = j - i;
775         else ncols = n - i;
776         /* Now assemble all these values with a single function call */
777         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
778         i = j;
779       }
780     }
781     PetscCall(MatStashScatterEnd_Private(&mat->stash));
782   }
783 #if defined(PETSC_HAVE_DEVICE)
784   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
785   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
786   if (mat->boundtocpu) {
787     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
788     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
789   }
790 #endif
791   PetscCall(MatAssemblyBegin(aij->A, mode));
792   PetscCall(MatAssemblyEnd(aij->A, mode));
793 
794   /* determine if any processor has disassembled, if so we must
795      also disassemble ourself, in order that we may reassemble. */
796   /*
797      if nonzero structure of submatrix B cannot change then we know that
798      no processor disassembled thus we can skip this stuff
799   */
800   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
801     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
802     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
803       PetscCall(MatDisAssemble_MPIAIJ(mat));
804     }
805   }
806   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
807   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
808 #if defined(PETSC_HAVE_DEVICE)
809   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
810 #endif
811   PetscCall(MatAssemblyBegin(aij->B, mode));
812   PetscCall(MatAssemblyEnd(aij->B, mode));
813 
814   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
815 
816   aij->rowvalues = NULL;
817 
818   PetscCall(VecDestroy(&aij->diag));
819 
820   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
821   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
822     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
823     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
824   }
825 #if defined(PETSC_HAVE_DEVICE)
826   mat->offloadmask = PETSC_OFFLOAD_BOTH;
827 #endif
828   PetscFunctionReturn(PETSC_SUCCESS);
829 }
830 
831 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
832 {
833   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
834 
835   PetscFunctionBegin;
836   PetscCall(MatZeroEntries(l->A));
837   PetscCall(MatZeroEntries(l->B));
838   PetscFunctionReturn(PETSC_SUCCESS);
839 }
840 
841 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
842 {
843   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
844   PetscInt   *lrows;
845   PetscInt    r, len;
846   PetscBool   cong;
847 
848   PetscFunctionBegin;
849   /* get locally owned rows */
850   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
851   PetscCall(MatHasCongruentLayouts(A, &cong));
852   /* fix right-hand side if needed */
853   if (x && b) {
854     const PetscScalar *xx;
855     PetscScalar       *bb;
856 
857     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
858     PetscCall(VecGetArrayRead(x, &xx));
859     PetscCall(VecGetArray(b, &bb));
860     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
861     PetscCall(VecRestoreArrayRead(x, &xx));
862     PetscCall(VecRestoreArray(b, &bb));
863   }
864 
865   if (diag != 0.0 && cong) {
866     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
867     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
868   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
869     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
870     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
871     PetscInt    nnwA, nnwB;
872     PetscBool   nnzA, nnzB;
873 
874     nnwA = aijA->nonew;
875     nnwB = aijB->nonew;
876     nnzA = aijA->keepnonzeropattern;
877     nnzB = aijB->keepnonzeropattern;
878     if (!nnzA) {
879       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
880       aijA->nonew = 0;
881     }
882     if (!nnzB) {
883       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
884       aijB->nonew = 0;
885     }
886     /* Must zero here before the next loop */
887     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
888     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
889     for (r = 0; r < len; ++r) {
890       const PetscInt row = lrows[r] + A->rmap->rstart;
891       if (row >= A->cmap->N) continue;
892       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
893     }
894     aijA->nonew = nnwA;
895     aijB->nonew = nnwB;
896   } else {
897     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
898     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
899   }
900   PetscCall(PetscFree(lrows));
901   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
902   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
903 
904   /* only change matrix nonzero state if pattern was allowed to be changed */
905   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
906     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
907     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
908   }
909   PetscFunctionReturn(PETSC_SUCCESS);
910 }
911 
912 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
913 {
914   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
915   PetscMPIInt        n = A->rmap->n;
916   PetscInt           i, j, r, m, len = 0;
917   PetscInt          *lrows, *owners = A->rmap->range;
918   PetscMPIInt        p = 0;
919   PetscSFNode       *rrows;
920   PetscSF            sf;
921   const PetscScalar *xx;
922   PetscScalar       *bb, *mask, *aij_a;
923   Vec                xmask, lmask;
924   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
925   const PetscInt    *aj, *ii, *ridx;
926   PetscScalar       *aa;
927 
928   PetscFunctionBegin;
929   /* Create SF where leaves are input rows and roots are owned rows */
930   PetscCall(PetscMalloc1(n, &lrows));
931   for (r = 0; r < n; ++r) lrows[r] = -1;
932   PetscCall(PetscMalloc1(N, &rrows));
933   for (r = 0; r < N; ++r) {
934     const PetscInt idx = rows[r];
935     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
936     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
937       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
938     }
939     rrows[r].rank  = p;
940     rrows[r].index = rows[r] - owners[p];
941   }
942   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
943   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
944   /* Collect flags for rows to be zeroed */
945   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
946   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
947   PetscCall(PetscSFDestroy(&sf));
948   /* Compress and put in row numbers */
949   for (r = 0; r < n; ++r)
950     if (lrows[r] >= 0) lrows[len++] = r;
951   /* zero diagonal part of matrix */
952   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
953   /* handle off-diagonal part of matrix */
954   PetscCall(MatCreateVecs(A, &xmask, NULL));
955   PetscCall(VecDuplicate(l->lvec, &lmask));
956   PetscCall(VecGetArray(xmask, &bb));
957   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
958   PetscCall(VecRestoreArray(xmask, &bb));
959   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
960   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
961   PetscCall(VecDestroy(&xmask));
962   if (x && b) { /* this code is buggy when the row and column layout don't match */
963     PetscBool cong;
964 
965     PetscCall(MatHasCongruentLayouts(A, &cong));
966     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
967     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
968     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
969     PetscCall(VecGetArrayRead(l->lvec, &xx));
970     PetscCall(VecGetArray(b, &bb));
971   }
972   PetscCall(VecGetArray(lmask, &mask));
973   /* remove zeroed rows of off-diagonal matrix */
974   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
975   ii = aij->i;
976   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
977   /* loop over all elements of off process part of matrix zeroing removed columns*/
978   if (aij->compressedrow.use) {
979     m    = aij->compressedrow.nrows;
980     ii   = aij->compressedrow.i;
981     ridx = aij->compressedrow.rindex;
982     for (i = 0; i < m; i++) {
983       n  = ii[i + 1] - ii[i];
984       aj = aij->j + ii[i];
985       aa = aij_a + ii[i];
986 
987       for (j = 0; j < n; j++) {
988         if (PetscAbsScalar(mask[*aj])) {
989           if (b) bb[*ridx] -= *aa * xx[*aj];
990           *aa = 0.0;
991         }
992         aa++;
993         aj++;
994       }
995       ridx++;
996     }
997   } else { /* do not use compressed row format */
998     m = l->B->rmap->n;
999     for (i = 0; i < m; i++) {
1000       n  = ii[i + 1] - ii[i];
1001       aj = aij->j + ii[i];
1002       aa = aij_a + ii[i];
1003       for (j = 0; j < n; j++) {
1004         if (PetscAbsScalar(mask[*aj])) {
1005           if (b) bb[i] -= *aa * xx[*aj];
1006           *aa = 0.0;
1007         }
1008         aa++;
1009         aj++;
1010       }
1011     }
1012   }
1013   if (x && b) {
1014     PetscCall(VecRestoreArray(b, &bb));
1015     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1016   }
1017   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1018   PetscCall(VecRestoreArray(lmask, &mask));
1019   PetscCall(VecDestroy(&lmask));
1020   PetscCall(PetscFree(lrows));
1021 
1022   /* only change matrix nonzero state if pattern was allowed to be changed */
1023   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1024     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1025     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1026   }
1027   PetscFunctionReturn(PETSC_SUCCESS);
1028 }
1029 
1030 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1031 {
1032   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1033   PetscInt    nt;
1034   VecScatter  Mvctx = a->Mvctx;
1035 
1036   PetscFunctionBegin;
1037   PetscCall(VecGetLocalSize(xx, &nt));
1038   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1039   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1040   PetscUseTypeMethod(a->A, mult, xx, yy);
1041   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1043   PetscFunctionReturn(PETSC_SUCCESS);
1044 }
1045 
1046 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1047 {
1048   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1049 
1050   PetscFunctionBegin;
1051   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1052   PetscFunctionReturn(PETSC_SUCCESS);
1053 }
1054 
1055 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1056 {
1057   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1058   VecScatter  Mvctx = a->Mvctx;
1059 
1060   PetscFunctionBegin;
1061   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1062   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1063   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067 
1068 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1069 {
1070   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1071 
1072   PetscFunctionBegin;
1073   /* do nondiagonal part */
1074   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1075   /* do local part */
1076   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1077   /* add partial results together */
1078   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1079   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1080   PetscFunctionReturn(PETSC_SUCCESS);
1081 }
1082 
1083 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1084 {
1085   MPI_Comm    comm;
1086   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1087   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1088   IS          Me, Notme;
1089   PetscInt    M, N, first, last, *notme, i;
1090   PetscBool   lf;
1091   PetscMPIInt size;
1092 
1093   PetscFunctionBegin;
1094   /* Easy test: symmetric diagonal block */
1095   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1096   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1097   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1098   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1099   PetscCallMPI(MPI_Comm_size(comm, &size));
1100   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1101 
1102   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1103   PetscCall(MatGetSize(Amat, &M, &N));
1104   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1105   PetscCall(PetscMalloc1(N - last + first, &notme));
1106   for (i = 0; i < first; i++) notme[i] = i;
1107   for (i = last; i < M; i++) notme[i - last + first] = i;
1108   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1109   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1110   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1111   Aoff = Aoffs[0];
1112   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1113   Boff = Boffs[0];
1114   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1115   PetscCall(MatDestroyMatrices(1, &Aoffs));
1116   PetscCall(MatDestroyMatrices(1, &Boffs));
1117   PetscCall(ISDestroy(&Me));
1118   PetscCall(ISDestroy(&Notme));
1119   PetscCall(PetscFree(notme));
1120   PetscFunctionReturn(PETSC_SUCCESS);
1121 }
1122 
1123 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1124 {
1125   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1126 
1127   PetscFunctionBegin;
1128   /* do nondiagonal part */
1129   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1130   /* do local part */
1131   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1132   /* add partial results together */
1133   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1134   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1135   PetscFunctionReturn(PETSC_SUCCESS);
1136 }
1137 
1138 /*
1139   This only works correctly for square matrices where the subblock A->A is the
1140    diagonal block
1141 */
1142 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1143 {
1144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1145 
1146   PetscFunctionBegin;
1147   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1148   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1149   PetscCall(MatGetDiagonal(a->A, v));
1150   PetscFunctionReturn(PETSC_SUCCESS);
1151 }
1152 
1153 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1154 {
1155   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1156 
1157   PetscFunctionBegin;
1158   PetscCall(MatScale(a->A, aa));
1159   PetscCall(MatScale(a->B, aa));
1160   PetscFunctionReturn(PETSC_SUCCESS);
1161 }
1162 
1163 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1164 {
1165   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1166   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1167   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1168   const PetscInt    *garray = aij->garray;
1169   const PetscScalar *aa, *ba;
1170   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1171   PetscInt64         nz, hnz;
1172   PetscInt          *rowlens;
1173   PetscInt          *colidxs;
1174   PetscScalar       *matvals;
1175   PetscMPIInt        rank;
1176 
1177   PetscFunctionBegin;
1178   PetscCall(PetscViewerSetUp(viewer));
1179 
1180   M  = mat->rmap->N;
1181   N  = mat->cmap->N;
1182   m  = mat->rmap->n;
1183   rs = mat->rmap->rstart;
1184   cs = mat->cmap->rstart;
1185   nz = A->nz + B->nz;
1186 
1187   /* write matrix header */
1188   header[0] = MAT_FILE_CLASSID;
1189   header[1] = M;
1190   header[2] = N;
1191   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1192   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1193   if (rank == 0) {
1194     if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT;
1195     else header[3] = (PetscInt)hnz;
1196   }
1197   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1198 
1199   /* fill in and store row lengths  */
1200   PetscCall(PetscMalloc1(m, &rowlens));
1201   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1202   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1203   PetscCall(PetscFree(rowlens));
1204 
1205   /* fill in and store column indices */
1206   PetscCall(PetscMalloc1(nz, &colidxs));
1207   for (cnt = 0, i = 0; i < m; i++) {
1208     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1209       if (garray[B->j[jb]] > cs) break;
1210       colidxs[cnt++] = garray[B->j[jb]];
1211     }
1212     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1213     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1214   }
1215   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1216   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1217   PetscCall(PetscFree(colidxs));
1218 
1219   /* fill in and store nonzero values */
1220   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1221   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1222   PetscCall(PetscMalloc1(nz, &matvals));
1223   for (cnt = 0, i = 0; i < m; i++) {
1224     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1225       if (garray[B->j[jb]] > cs) break;
1226       matvals[cnt++] = ba[jb];
1227     }
1228     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1229     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1230   }
1231   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1232   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1233   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1234   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1235   PetscCall(PetscFree(matvals));
1236 
1237   /* write block size option to the viewer's .info file */
1238   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1239   PetscFunctionReturn(PETSC_SUCCESS);
1240 }
1241 
1242 #include <petscdraw.h>
1243 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1244 {
1245   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1246   PetscMPIInt       rank = aij->rank, size = aij->size;
1247   PetscBool         isdraw, iascii, isbinary;
1248   PetscViewer       sviewer;
1249   PetscViewerFormat format;
1250 
1251   PetscFunctionBegin;
1252   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1253   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1255   if (iascii) {
1256     PetscCall(PetscViewerGetFormat(viewer, &format));
1257     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1258       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1259       PetscCall(PetscMalloc1(size, &nz));
1260       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1261       for (i = 0; i < (PetscInt)size; i++) {
1262         nmax = PetscMax(nmax, nz[i]);
1263         nmin = PetscMin(nmin, nz[i]);
1264         navg += nz[i];
1265       }
1266       PetscCall(PetscFree(nz));
1267       navg = navg / size;
1268       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1269       PetscFunctionReturn(PETSC_SUCCESS);
1270     }
1271     PetscCall(PetscViewerGetFormat(viewer, &format));
1272     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1273       MatInfo   info;
1274       PetscInt *inodes = NULL;
1275 
1276       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1277       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1278       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1279       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1280       if (!inodes) {
1281         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1282                                                      (double)info.memory));
1283       } else {
1284         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1285                                                      (double)info.memory));
1286       }
1287       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1288       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1289       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(PetscViewerFlush(viewer));
1292       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1293       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1294       PetscCall(VecScatterView(aij->Mvctx, viewer));
1295       PetscFunctionReturn(PETSC_SUCCESS);
1296     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1297       PetscInt inodecount, inodelimit, *inodes;
1298       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1299       if (inodes) {
1300         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1301       } else {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1303       }
1304       PetscFunctionReturn(PETSC_SUCCESS);
1305     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     }
1308   } else if (isbinary) {
1309     if (size == 1) {
1310       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1311       PetscCall(MatView(aij->A, viewer));
1312     } else {
1313       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1314     }
1315     PetscFunctionReturn(PETSC_SUCCESS);
1316   } else if (iascii && size == 1) {
1317     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1318     PetscCall(MatView(aij->A, viewer));
1319     PetscFunctionReturn(PETSC_SUCCESS);
1320   } else if (isdraw) {
1321     PetscDraw draw;
1322     PetscBool isnull;
1323     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1324     PetscCall(PetscDrawIsNull(draw, &isnull));
1325     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1326   }
1327 
1328   { /* assemble the entire matrix onto first processor */
1329     Mat A = NULL, Av;
1330     IS  isrow, iscol;
1331 
1332     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1333     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1334     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1335     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1336     /*  The commented code uses MatCreateSubMatrices instead */
1337     /*
1338     Mat *AA, A = NULL, Av;
1339     IS  isrow,iscol;
1340 
1341     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1342     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1343     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1344     if (rank == 0) {
1345        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1346        A    = AA[0];
1347        Av   = AA[0];
1348     }
1349     PetscCall(MatDestroySubMatrices(1,&AA));
1350 */
1351     PetscCall(ISDestroy(&iscol));
1352     PetscCall(ISDestroy(&isrow));
1353     /*
1354        Everyone has to call to draw the matrix since the graphics waits are
1355        synchronized across all processors that share the PetscDraw object
1356     */
1357     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1358     if (rank == 0) {
1359       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1360       PetscCall(MatView_SeqAIJ(Av, sviewer));
1361     }
1362     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1363     PetscCall(MatDestroy(&A));
1364   }
1365   PetscFunctionReturn(PETSC_SUCCESS);
1366 }
1367 
1368 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1369 {
1370   PetscBool iascii, isdraw, issocket, isbinary;
1371 
1372   PetscFunctionBegin;
1373   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1374   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1377   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1378   PetscFunctionReturn(PETSC_SUCCESS);
1379 }
1380 
1381 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1382 {
1383   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1384   Vec         bb1 = NULL;
1385   PetscBool   hasop;
1386 
1387   PetscFunctionBegin;
1388   if (flag == SOR_APPLY_UPPER) {
1389     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1390     PetscFunctionReturn(PETSC_SUCCESS);
1391   }
1392 
1393   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1394 
1395   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1396     if (flag & SOR_ZERO_INITIAL_GUESS) {
1397       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1398       its--;
1399     }
1400 
1401     while (its--) {
1402       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1403       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1404 
1405       /* update rhs: bb1 = bb - B*x */
1406       PetscCall(VecScale(mat->lvec, -1.0));
1407       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1408 
1409       /* local sweep */
1410       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1411     }
1412   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1413     if (flag & SOR_ZERO_INITIAL_GUESS) {
1414       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1415       its--;
1416     }
1417     while (its--) {
1418       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1419       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1420 
1421       /* update rhs: bb1 = bb - B*x */
1422       PetscCall(VecScale(mat->lvec, -1.0));
1423       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1424 
1425       /* local sweep */
1426       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1427     }
1428   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1429     if (flag & SOR_ZERO_INITIAL_GUESS) {
1430       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1431       its--;
1432     }
1433     while (its--) {
1434       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1435       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1436 
1437       /* update rhs: bb1 = bb - B*x */
1438       PetscCall(VecScale(mat->lvec, -1.0));
1439       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1440 
1441       /* local sweep */
1442       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1443     }
1444   } else if (flag & SOR_EISENSTAT) {
1445     Vec xx1;
1446 
1447     PetscCall(VecDuplicate(bb, &xx1));
1448     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1449 
1450     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1451     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1452     if (!mat->diag) {
1453       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1454       PetscCall(MatGetDiagonal(matin, mat->diag));
1455     }
1456     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1457     if (hasop) {
1458       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1459     } else {
1460       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1461     }
1462     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1463 
1464     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1465 
1466     /* local sweep */
1467     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1468     PetscCall(VecAXPY(xx, 1.0, xx1));
1469     PetscCall(VecDestroy(&xx1));
1470   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1471 
1472   PetscCall(VecDestroy(&bb1));
1473 
1474   matin->factorerrortype = mat->A->factorerrortype;
1475   PetscFunctionReturn(PETSC_SUCCESS);
1476 }
1477 
1478 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1479 {
1480   Mat             aA, aB, Aperm;
1481   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1482   PetscScalar    *aa, *ba;
1483   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1484   PetscSF         rowsf, sf;
1485   IS              parcolp = NULL;
1486   PetscBool       done;
1487 
1488   PetscFunctionBegin;
1489   PetscCall(MatGetLocalSize(A, &m, &n));
1490   PetscCall(ISGetIndices(rowp, &rwant));
1491   PetscCall(ISGetIndices(colp, &cwant));
1492   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1493 
1494   /* Invert row permutation to find out where my rows should go */
1495   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1496   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1497   PetscCall(PetscSFSetFromOptions(rowsf));
1498   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1499   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1500   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1501 
1502   /* Invert column permutation to find out where my columns should go */
1503   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1504   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1505   PetscCall(PetscSFSetFromOptions(sf));
1506   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1507   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1508   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1509   PetscCall(PetscSFDestroy(&sf));
1510 
1511   PetscCall(ISRestoreIndices(rowp, &rwant));
1512   PetscCall(ISRestoreIndices(colp, &cwant));
1513   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1514 
1515   /* Find out where my gcols should go */
1516   PetscCall(MatGetSize(aB, NULL, &ng));
1517   PetscCall(PetscMalloc1(ng, &gcdest));
1518   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1519   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1520   PetscCall(PetscSFSetFromOptions(sf));
1521   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1522   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1523   PetscCall(PetscSFDestroy(&sf));
1524 
1525   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1526   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1527   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1528   for (i = 0; i < m; i++) {
1529     PetscInt    row = rdest[i];
1530     PetscMPIInt rowner;
1531     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1532     for (j = ai[i]; j < ai[i + 1]; j++) {
1533       PetscInt    col = cdest[aj[j]];
1534       PetscMPIInt cowner;
1535       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1536       if (rowner == cowner) dnnz[i]++;
1537       else onnz[i]++;
1538     }
1539     for (j = bi[i]; j < bi[i + 1]; j++) {
1540       PetscInt    col = gcdest[bj[j]];
1541       PetscMPIInt cowner;
1542       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1543       if (rowner == cowner) dnnz[i]++;
1544       else onnz[i]++;
1545     }
1546   }
1547   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1548   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1551   PetscCall(PetscSFDestroy(&rowsf));
1552 
1553   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1554   PetscCall(MatSeqAIJGetArray(aA, &aa));
1555   PetscCall(MatSeqAIJGetArray(aB, &ba));
1556   for (i = 0; i < m; i++) {
1557     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1558     PetscInt  j0, rowlen;
1559     rowlen = ai[i + 1] - ai[i];
1560     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1561       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1562       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1563     }
1564     rowlen = bi[i + 1] - bi[i];
1565     for (j0 = j = 0; j < rowlen; j0 = j) {
1566       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1567       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1568     }
1569   }
1570   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1571   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1572   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1573   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1574   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1575   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1576   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1577   PetscCall(PetscFree3(work, rdest, cdest));
1578   PetscCall(PetscFree(gcdest));
1579   if (parcolp) PetscCall(ISDestroy(&colp));
1580   *B = Aperm;
1581   PetscFunctionReturn(PETSC_SUCCESS);
1582 }
1583 
1584 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1585 {
1586   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1587 
1588   PetscFunctionBegin;
1589   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1590   if (ghosts) *ghosts = aij->garray;
1591   PetscFunctionReturn(PETSC_SUCCESS);
1592 }
1593 
1594 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1595 {
1596   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1597   Mat            A = mat->A, B = mat->B;
1598   PetscLogDouble isend[5], irecv[5];
1599 
1600   PetscFunctionBegin;
1601   info->block_size = 1.0;
1602   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1603 
1604   isend[0] = info->nz_used;
1605   isend[1] = info->nz_allocated;
1606   isend[2] = info->nz_unneeded;
1607   isend[3] = info->memory;
1608   isend[4] = info->mallocs;
1609 
1610   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1611 
1612   isend[0] += info->nz_used;
1613   isend[1] += info->nz_allocated;
1614   isend[2] += info->nz_unneeded;
1615   isend[3] += info->memory;
1616   isend[4] += info->mallocs;
1617   if (flag == MAT_LOCAL) {
1618     info->nz_used      = isend[0];
1619     info->nz_allocated = isend[1];
1620     info->nz_unneeded  = isend[2];
1621     info->memory       = isend[3];
1622     info->mallocs      = isend[4];
1623   } else if (flag == MAT_GLOBAL_MAX) {
1624     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1625 
1626     info->nz_used      = irecv[0];
1627     info->nz_allocated = irecv[1];
1628     info->nz_unneeded  = irecv[2];
1629     info->memory       = irecv[3];
1630     info->mallocs      = irecv[4];
1631   } else if (flag == MAT_GLOBAL_SUM) {
1632     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1633 
1634     info->nz_used      = irecv[0];
1635     info->nz_allocated = irecv[1];
1636     info->nz_unneeded  = irecv[2];
1637     info->memory       = irecv[3];
1638     info->mallocs      = irecv[4];
1639   }
1640   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1641   info->fill_ratio_needed = 0;
1642   info->factor_mallocs    = 0;
1643   PetscFunctionReturn(PETSC_SUCCESS);
1644 }
1645 
1646 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1647 {
1648   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1649 
1650   PetscFunctionBegin;
1651   switch (op) {
1652   case MAT_NEW_NONZERO_LOCATIONS:
1653   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1654   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1655   case MAT_KEEP_NONZERO_PATTERN:
1656   case MAT_NEW_NONZERO_LOCATION_ERR:
1657   case MAT_USE_INODES:
1658   case MAT_IGNORE_ZERO_ENTRIES:
1659   case MAT_FORM_EXPLICIT_TRANSPOSE:
1660     MatCheckPreallocated(A, 1);
1661     PetscCall(MatSetOption(a->A, op, flg));
1662     PetscCall(MatSetOption(a->B, op, flg));
1663     break;
1664   case MAT_ROW_ORIENTED:
1665     MatCheckPreallocated(A, 1);
1666     a->roworiented = flg;
1667 
1668     PetscCall(MatSetOption(a->A, op, flg));
1669     PetscCall(MatSetOption(a->B, op, flg));
1670     break;
1671   case MAT_FORCE_DIAGONAL_ENTRIES:
1672   case MAT_SORTED_FULL:
1673     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1674     break;
1675   case MAT_IGNORE_OFF_PROC_ENTRIES:
1676     a->donotstash = flg;
1677     break;
1678   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1679   case MAT_SPD:
1680   case MAT_SYMMETRIC:
1681   case MAT_STRUCTURALLY_SYMMETRIC:
1682   case MAT_HERMITIAN:
1683   case MAT_SYMMETRY_ETERNAL:
1684   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1685   case MAT_SPD_ETERNAL:
1686     /* if the diagonal matrix is square it inherits some of the properties above */
1687     break;
1688   case MAT_SUBMAT_SINGLEIS:
1689     A->submat_singleis = flg;
1690     break;
1691   case MAT_STRUCTURE_ONLY:
1692     /* The option is handled directly by MatSetOption() */
1693     break;
1694   default:
1695     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1696   }
1697   PetscFunctionReturn(PETSC_SUCCESS);
1698 }
1699 
1700 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1701 {
1702   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1703   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1704   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1705   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1706   PetscInt    *cmap, *idx_p;
1707 
1708   PetscFunctionBegin;
1709   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1710   mat->getrowactive = PETSC_TRUE;
1711 
1712   if (!mat->rowvalues && (idx || v)) {
1713     /*
1714         allocate enough space to hold information from the longest row.
1715     */
1716     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1717     PetscInt    max = 1, tmp;
1718     for (i = 0; i < matin->rmap->n; i++) {
1719       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1720       if (max < tmp) max = tmp;
1721     }
1722     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1723   }
1724 
1725   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1726   lrow = row - rstart;
1727 
1728   pvA = &vworkA;
1729   pcA = &cworkA;
1730   pvB = &vworkB;
1731   pcB = &cworkB;
1732   if (!v) {
1733     pvA = NULL;
1734     pvB = NULL;
1735   }
1736   if (!idx) {
1737     pcA = NULL;
1738     if (!v) pcB = NULL;
1739   }
1740   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1741   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1742   nztot = nzA + nzB;
1743 
1744   cmap = mat->garray;
1745   if (v || idx) {
1746     if (nztot) {
1747       /* Sort by increasing column numbers, assuming A and B already sorted */
1748       PetscInt imark = -1;
1749       if (v) {
1750         *v = v_p = mat->rowvalues;
1751         for (i = 0; i < nzB; i++) {
1752           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1753           else break;
1754         }
1755         imark = i;
1756         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1757         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1758       }
1759       if (idx) {
1760         *idx = idx_p = mat->rowindices;
1761         if (imark > -1) {
1762           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1763         } else {
1764           for (i = 0; i < nzB; i++) {
1765             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1766             else break;
1767           }
1768           imark = i;
1769         }
1770         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1771         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1772       }
1773     } else {
1774       if (idx) *idx = NULL;
1775       if (v) *v = NULL;
1776     }
1777   }
1778   *nz = nztot;
1779   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1780   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1781   PetscFunctionReturn(PETSC_SUCCESS);
1782 }
1783 
1784 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1785 {
1786   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1787 
1788   PetscFunctionBegin;
1789   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1790   aij->getrowactive = PETSC_FALSE;
1791   PetscFunctionReturn(PETSC_SUCCESS);
1792 }
1793 
1794 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1795 {
1796   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1797   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1798   PetscInt         i, j, cstart = mat->cmap->rstart;
1799   PetscReal        sum = 0.0;
1800   const MatScalar *v, *amata, *bmata;
1801 
1802   PetscFunctionBegin;
1803   if (aij->size == 1) {
1804     PetscCall(MatNorm(aij->A, type, norm));
1805   } else {
1806     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1807     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1808     if (type == NORM_FROBENIUS) {
1809       v = amata;
1810       for (i = 0; i < amat->nz; i++) {
1811         sum += PetscRealPart(PetscConj(*v) * (*v));
1812         v++;
1813       }
1814       v = bmata;
1815       for (i = 0; i < bmat->nz; i++) {
1816         sum += PetscRealPart(PetscConj(*v) * (*v));
1817         v++;
1818       }
1819       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1820       *norm = PetscSqrtReal(*norm);
1821       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1822     } else if (type == NORM_1) { /* max column norm */
1823       PetscReal *tmp, *tmp2;
1824       PetscInt  *jj, *garray = aij->garray;
1825       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1826       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1827       *norm = 0.0;
1828       v     = amata;
1829       jj    = amat->j;
1830       for (j = 0; j < amat->nz; j++) {
1831         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1832         v++;
1833       }
1834       v  = bmata;
1835       jj = bmat->j;
1836       for (j = 0; j < bmat->nz; j++) {
1837         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1838         v++;
1839       }
1840       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1841       for (j = 0; j < mat->cmap->N; j++) {
1842         if (tmp2[j] > *norm) *norm = tmp2[j];
1843       }
1844       PetscCall(PetscFree(tmp));
1845       PetscCall(PetscFree(tmp2));
1846       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1847     } else if (type == NORM_INFINITY) { /* max row norm */
1848       PetscReal ntemp = 0.0;
1849       for (j = 0; j < aij->A->rmap->n; j++) {
1850         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1851         sum = 0.0;
1852         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1853           sum += PetscAbsScalar(*v);
1854           v++;
1855         }
1856         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1857         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1858           sum += PetscAbsScalar(*v);
1859           v++;
1860         }
1861         if (sum > ntemp) ntemp = sum;
1862       }
1863       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1864       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1865     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1866     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1867     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1868   }
1869   PetscFunctionReturn(PETSC_SUCCESS);
1870 }
1871 
1872 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1873 {
1874   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1875   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1876   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1877   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1878   Mat              B, A_diag, *B_diag;
1879   const MatScalar *pbv, *bv;
1880 
1881   PetscFunctionBegin;
1882   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1883   ma = A->rmap->n;
1884   na = A->cmap->n;
1885   mb = a->B->rmap->n;
1886   nb = a->B->cmap->n;
1887   ai = Aloc->i;
1888   aj = Aloc->j;
1889   bi = Bloc->i;
1890   bj = Bloc->j;
1891   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1892     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1893     PetscSFNode         *oloc;
1894     PETSC_UNUSED PetscSF sf;
1895 
1896     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1897     /* compute d_nnz for preallocation */
1898     PetscCall(PetscArrayzero(d_nnz, na));
1899     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1900     /* compute local off-diagonal contributions */
1901     PetscCall(PetscArrayzero(g_nnz, nb));
1902     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1903     /* map those to global */
1904     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1905     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1906     PetscCall(PetscSFSetFromOptions(sf));
1907     PetscCall(PetscArrayzero(o_nnz, na));
1908     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1909     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1910     PetscCall(PetscSFDestroy(&sf));
1911 
1912     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1913     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1914     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1915     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1916     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1917     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1918   } else {
1919     B = *matout;
1920     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1921   }
1922 
1923   b           = (Mat_MPIAIJ *)B->data;
1924   A_diag      = a->A;
1925   B_diag      = &b->A;
1926   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1927   A_diag_ncol = A_diag->cmap->N;
1928   B_diag_ilen = sub_B_diag->ilen;
1929   B_diag_i    = sub_B_diag->i;
1930 
1931   /* Set ilen for diagonal of B */
1932   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1933 
1934   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1935   very quickly (=without using MatSetValues), because all writes are local. */
1936   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1937   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1938 
1939   /* copy over the B part */
1940   PetscCall(PetscMalloc1(bi[mb], &cols));
1941   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1942   pbv = bv;
1943   row = A->rmap->rstart;
1944   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1945   cols_tmp = cols;
1946   for (i = 0; i < mb; i++) {
1947     ncol = bi[i + 1] - bi[i];
1948     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1949     row++;
1950     if (pbv) pbv += ncol;
1951     if (cols_tmp) cols_tmp += ncol;
1952   }
1953   PetscCall(PetscFree(cols));
1954   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1955 
1956   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1957   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1958   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1959     *matout = B;
1960   } else {
1961     PetscCall(MatHeaderMerge(A, &B));
1962   }
1963   PetscFunctionReturn(PETSC_SUCCESS);
1964 }
1965 
1966 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1967 {
1968   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1969   Mat         a = aij->A, b = aij->B;
1970   PetscInt    s1, s2, s3;
1971 
1972   PetscFunctionBegin;
1973   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1974   if (rr) {
1975     PetscCall(VecGetLocalSize(rr, &s1));
1976     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1977     /* Overlap communication with computation. */
1978     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1979   }
1980   if (ll) {
1981     PetscCall(VecGetLocalSize(ll, &s1));
1982     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1983     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1984   }
1985   /* scale  the diagonal block */
1986   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1987 
1988   if (rr) {
1989     /* Do a scatter end and then right scale the off-diagonal block */
1990     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1991     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1992   }
1993   PetscFunctionReturn(PETSC_SUCCESS);
1994 }
1995 
1996 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1997 {
1998   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1999 
2000   PetscFunctionBegin;
2001   PetscCall(MatSetUnfactored(a->A));
2002   PetscFunctionReturn(PETSC_SUCCESS);
2003 }
2004 
2005 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2006 {
2007   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2008   Mat         a, b, c, d;
2009   PetscBool   flg;
2010 
2011   PetscFunctionBegin;
2012   a = matA->A;
2013   b = matA->B;
2014   c = matB->A;
2015   d = matB->B;
2016 
2017   PetscCall(MatEqual(a, c, &flg));
2018   if (flg) PetscCall(MatEqual(b, d, &flg));
2019   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2020   PetscFunctionReturn(PETSC_SUCCESS);
2021 }
2022 
2023 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2024 {
2025   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2026   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2027 
2028   PetscFunctionBegin;
2029   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2030   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2031     /* because of the column compression in the off-processor part of the matrix a->B,
2032        the number of columns in a->B and b->B may be different, hence we cannot call
2033        the MatCopy() directly on the two parts. If need be, we can provide a more
2034        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2035        then copying the submatrices */
2036     PetscCall(MatCopy_Basic(A, B, str));
2037   } else {
2038     PetscCall(MatCopy(a->A, b->A, str));
2039     PetscCall(MatCopy(a->B, b->B, str));
2040   }
2041   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2042   PetscFunctionReturn(PETSC_SUCCESS);
2043 }
2044 
2045 /*
2046    Computes the number of nonzeros per row needed for preallocation when X and Y
2047    have different nonzero structure.
2048 */
2049 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2050 {
2051   PetscInt i, j, k, nzx, nzy;
2052 
2053   PetscFunctionBegin;
2054   /* Set the number of nonzeros in the new matrix */
2055   for (i = 0; i < m; i++) {
2056     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2057     nzx    = xi[i + 1] - xi[i];
2058     nzy    = yi[i + 1] - yi[i];
2059     nnz[i] = 0;
2060     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2061       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2062       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2063       nnz[i]++;
2064     }
2065     for (; k < nzy; k++) nnz[i]++;
2066   }
2067   PetscFunctionReturn(PETSC_SUCCESS);
2068 }
2069 
2070 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2071 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2072 {
2073   PetscInt    m = Y->rmap->N;
2074   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2075   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2076 
2077   PetscFunctionBegin;
2078   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2079   PetscFunctionReturn(PETSC_SUCCESS);
2080 }
2081 
2082 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2083 {
2084   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2085 
2086   PetscFunctionBegin;
2087   if (str == SAME_NONZERO_PATTERN) {
2088     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2089     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2090   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2091     PetscCall(MatAXPY_Basic(Y, a, X, str));
2092   } else {
2093     Mat       B;
2094     PetscInt *nnz_d, *nnz_o;
2095 
2096     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2097     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2098     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2099     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2100     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2101     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2102     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2103     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2104     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2105     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2106     PetscCall(MatHeaderMerge(Y, &B));
2107     PetscCall(PetscFree(nnz_d));
2108     PetscCall(PetscFree(nnz_o));
2109   }
2110   PetscFunctionReturn(PETSC_SUCCESS);
2111 }
2112 
2113 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2114 
2115 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2116 {
2117   PetscFunctionBegin;
2118   if (PetscDefined(USE_COMPLEX)) {
2119     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2120 
2121     PetscCall(MatConjugate_SeqAIJ(aij->A));
2122     PetscCall(MatConjugate_SeqAIJ(aij->B));
2123   }
2124   PetscFunctionReturn(PETSC_SUCCESS);
2125 }
2126 
2127 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2128 {
2129   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2130 
2131   PetscFunctionBegin;
2132   PetscCall(MatRealPart(a->A));
2133   PetscCall(MatRealPart(a->B));
2134   PetscFunctionReturn(PETSC_SUCCESS);
2135 }
2136 
2137 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2140 
2141   PetscFunctionBegin;
2142   PetscCall(MatImaginaryPart(a->A));
2143   PetscCall(MatImaginaryPart(a->B));
2144   PetscFunctionReturn(PETSC_SUCCESS);
2145 }
2146 
2147 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2148 {
2149   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2150   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2151   PetscScalar       *va, *vv;
2152   Vec                vB, vA;
2153   const PetscScalar *vb;
2154 
2155   PetscFunctionBegin;
2156   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2157   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2158 
2159   PetscCall(VecGetArrayWrite(vA, &va));
2160   if (idx) {
2161     for (i = 0; i < m; i++) {
2162       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2163     }
2164   }
2165 
2166   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2167   PetscCall(PetscMalloc1(m, &idxb));
2168   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2169 
2170   PetscCall(VecGetArrayWrite(v, &vv));
2171   PetscCall(VecGetArrayRead(vB, &vb));
2172   for (i = 0; i < m; i++) {
2173     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2174       vv[i] = vb[i];
2175       if (idx) idx[i] = a->garray[idxb[i]];
2176     } else {
2177       vv[i] = va[i];
2178       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2179     }
2180   }
2181   PetscCall(VecRestoreArrayWrite(vA, &vv));
2182   PetscCall(VecRestoreArrayWrite(vA, &va));
2183   PetscCall(VecRestoreArrayRead(vB, &vb));
2184   PetscCall(PetscFree(idxb));
2185   PetscCall(VecDestroy(&vA));
2186   PetscCall(VecDestroy(&vB));
2187   PetscFunctionReturn(PETSC_SUCCESS);
2188 }
2189 
2190 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2191 {
2192   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2193   Vec         vB, vA;
2194 
2195   PetscFunctionBegin;
2196   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2197   PetscCall(MatGetRowSumAbs(a->A, vA));
2198   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2199   PetscCall(MatGetRowSumAbs(a->B, vB));
2200   PetscCall(VecAXPY(vA, 1.0, vB));
2201   PetscCall(VecDestroy(&vB));
2202   PetscCall(VecCopy(vA, v));
2203   PetscCall(VecDestroy(&vA));
2204   PetscFunctionReturn(PETSC_SUCCESS);
2205 }
2206 
2207 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2208 {
2209   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2210   PetscInt           m = A->rmap->n, n = A->cmap->n;
2211   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2212   PetscInt          *cmap = mat->garray;
2213   PetscInt          *diagIdx, *offdiagIdx;
2214   Vec                diagV, offdiagV;
2215   PetscScalar       *a, *diagA, *offdiagA;
2216   const PetscScalar *ba, *bav;
2217   PetscInt           r, j, col, ncols, *bi, *bj;
2218   Mat                B = mat->B;
2219   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2220 
2221   PetscFunctionBegin;
2222   /* When a process holds entire A and other processes have no entry */
2223   if (A->cmap->N == n) {
2224     PetscCall(VecGetArrayWrite(v, &diagA));
2225     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2226     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2227     PetscCall(VecDestroy(&diagV));
2228     PetscCall(VecRestoreArrayWrite(v, &diagA));
2229     PetscFunctionReturn(PETSC_SUCCESS);
2230   } else if (n == 0) {
2231     if (m) {
2232       PetscCall(VecGetArrayWrite(v, &a));
2233       for (r = 0; r < m; r++) {
2234         a[r] = 0.0;
2235         if (idx) idx[r] = -1;
2236       }
2237       PetscCall(VecRestoreArrayWrite(v, &a));
2238     }
2239     PetscFunctionReturn(PETSC_SUCCESS);
2240   }
2241 
2242   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2243   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2244   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2245   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2246 
2247   /* Get offdiagIdx[] for implicit 0.0 */
2248   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2249   ba = bav;
2250   bi = b->i;
2251   bj = b->j;
2252   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2253   for (r = 0; r < m; r++) {
2254     ncols = bi[r + 1] - bi[r];
2255     if (ncols == A->cmap->N - n) { /* Brow is dense */
2256       offdiagA[r]   = *ba;
2257       offdiagIdx[r] = cmap[0];
2258     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2259       offdiagA[r] = 0.0;
2260 
2261       /* Find first hole in the cmap */
2262       for (j = 0; j < ncols; j++) {
2263         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2264         if (col > j && j < cstart) {
2265           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2266           break;
2267         } else if (col > j + n && j >= cstart) {
2268           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2269           break;
2270         }
2271       }
2272       if (j == ncols && ncols < A->cmap->N - n) {
2273         /* a hole is outside compressed Bcols */
2274         if (ncols == 0) {
2275           if (cstart) {
2276             offdiagIdx[r] = 0;
2277           } else offdiagIdx[r] = cend;
2278         } else { /* ncols > 0 */
2279           offdiagIdx[r] = cmap[ncols - 1] + 1;
2280           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2281         }
2282       }
2283     }
2284 
2285     for (j = 0; j < ncols; j++) {
2286       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2287         offdiagA[r]   = *ba;
2288         offdiagIdx[r] = cmap[*bj];
2289       }
2290       ba++;
2291       bj++;
2292     }
2293   }
2294 
2295   PetscCall(VecGetArrayWrite(v, &a));
2296   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2297   for (r = 0; r < m; ++r) {
2298     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2299       a[r] = diagA[r];
2300       if (idx) idx[r] = cstart + diagIdx[r];
2301     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2302       a[r] = diagA[r];
2303       if (idx) {
2304         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2305           idx[r] = cstart + diagIdx[r];
2306         } else idx[r] = offdiagIdx[r];
2307       }
2308     } else {
2309       a[r] = offdiagA[r];
2310       if (idx) idx[r] = offdiagIdx[r];
2311     }
2312   }
2313   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2314   PetscCall(VecRestoreArrayWrite(v, &a));
2315   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2316   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2317   PetscCall(VecDestroy(&diagV));
2318   PetscCall(VecDestroy(&offdiagV));
2319   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2320   PetscFunctionReturn(PETSC_SUCCESS);
2321 }
2322 
2323 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2324 {
2325   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2326   PetscInt           m = A->rmap->n, n = A->cmap->n;
2327   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2328   PetscInt          *cmap = mat->garray;
2329   PetscInt          *diagIdx, *offdiagIdx;
2330   Vec                diagV, offdiagV;
2331   PetscScalar       *a, *diagA, *offdiagA;
2332   const PetscScalar *ba, *bav;
2333   PetscInt           r, j, col, ncols, *bi, *bj;
2334   Mat                B = mat->B;
2335   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2336 
2337   PetscFunctionBegin;
2338   /* When a process holds entire A and other processes have no entry */
2339   if (A->cmap->N == n) {
2340     PetscCall(VecGetArrayWrite(v, &diagA));
2341     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2342     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2343     PetscCall(VecDestroy(&diagV));
2344     PetscCall(VecRestoreArrayWrite(v, &diagA));
2345     PetscFunctionReturn(PETSC_SUCCESS);
2346   } else if (n == 0) {
2347     if (m) {
2348       PetscCall(VecGetArrayWrite(v, &a));
2349       for (r = 0; r < m; r++) {
2350         a[r] = PETSC_MAX_REAL;
2351         if (idx) idx[r] = -1;
2352       }
2353       PetscCall(VecRestoreArrayWrite(v, &a));
2354     }
2355     PetscFunctionReturn(PETSC_SUCCESS);
2356   }
2357 
2358   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2359   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2360   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2361   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2362 
2363   /* Get offdiagIdx[] for implicit 0.0 */
2364   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2365   ba = bav;
2366   bi = b->i;
2367   bj = b->j;
2368   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2369   for (r = 0; r < m; r++) {
2370     ncols = bi[r + 1] - bi[r];
2371     if (ncols == A->cmap->N - n) { /* Brow is dense */
2372       offdiagA[r]   = *ba;
2373       offdiagIdx[r] = cmap[0];
2374     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2375       offdiagA[r] = 0.0;
2376 
2377       /* Find first hole in the cmap */
2378       for (j = 0; j < ncols; j++) {
2379         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2380         if (col > j && j < cstart) {
2381           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2382           break;
2383         } else if (col > j + n && j >= cstart) {
2384           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2385           break;
2386         }
2387       }
2388       if (j == ncols && ncols < A->cmap->N - n) {
2389         /* a hole is outside compressed Bcols */
2390         if (ncols == 0) {
2391           if (cstart) {
2392             offdiagIdx[r] = 0;
2393           } else offdiagIdx[r] = cend;
2394         } else { /* ncols > 0 */
2395           offdiagIdx[r] = cmap[ncols - 1] + 1;
2396           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2397         }
2398       }
2399     }
2400 
2401     for (j = 0; j < ncols; j++) {
2402       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2403         offdiagA[r]   = *ba;
2404         offdiagIdx[r] = cmap[*bj];
2405       }
2406       ba++;
2407       bj++;
2408     }
2409   }
2410 
2411   PetscCall(VecGetArrayWrite(v, &a));
2412   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2413   for (r = 0; r < m; ++r) {
2414     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2415       a[r] = diagA[r];
2416       if (idx) idx[r] = cstart + diagIdx[r];
2417     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2418       a[r] = diagA[r];
2419       if (idx) {
2420         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2421           idx[r] = cstart + diagIdx[r];
2422         } else idx[r] = offdiagIdx[r];
2423       }
2424     } else {
2425       a[r] = offdiagA[r];
2426       if (idx) idx[r] = offdiagIdx[r];
2427     }
2428   }
2429   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2430   PetscCall(VecRestoreArrayWrite(v, &a));
2431   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2432   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2433   PetscCall(VecDestroy(&diagV));
2434   PetscCall(VecDestroy(&offdiagV));
2435   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2436   PetscFunctionReturn(PETSC_SUCCESS);
2437 }
2438 
2439 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2440 {
2441   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2442   PetscInt           m = A->rmap->n, n = A->cmap->n;
2443   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2444   PetscInt          *cmap = mat->garray;
2445   PetscInt          *diagIdx, *offdiagIdx;
2446   Vec                diagV, offdiagV;
2447   PetscScalar       *a, *diagA, *offdiagA;
2448   const PetscScalar *ba, *bav;
2449   PetscInt           r, j, col, ncols, *bi, *bj;
2450   Mat                B = mat->B;
2451   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2452 
2453   PetscFunctionBegin;
2454   /* When a process holds entire A and other processes have no entry */
2455   if (A->cmap->N == n) {
2456     PetscCall(VecGetArrayWrite(v, &diagA));
2457     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2458     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2459     PetscCall(VecDestroy(&diagV));
2460     PetscCall(VecRestoreArrayWrite(v, &diagA));
2461     PetscFunctionReturn(PETSC_SUCCESS);
2462   } else if (n == 0) {
2463     if (m) {
2464       PetscCall(VecGetArrayWrite(v, &a));
2465       for (r = 0; r < m; r++) {
2466         a[r] = PETSC_MIN_REAL;
2467         if (idx) idx[r] = -1;
2468       }
2469       PetscCall(VecRestoreArrayWrite(v, &a));
2470     }
2471     PetscFunctionReturn(PETSC_SUCCESS);
2472   }
2473 
2474   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2475   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2476   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2477   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2478 
2479   /* Get offdiagIdx[] for implicit 0.0 */
2480   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2481   ba = bav;
2482   bi = b->i;
2483   bj = b->j;
2484   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2485   for (r = 0; r < m; r++) {
2486     ncols = bi[r + 1] - bi[r];
2487     if (ncols == A->cmap->N - n) { /* Brow is dense */
2488       offdiagA[r]   = *ba;
2489       offdiagIdx[r] = cmap[0];
2490     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2491       offdiagA[r] = 0.0;
2492 
2493       /* Find first hole in the cmap */
2494       for (j = 0; j < ncols; j++) {
2495         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2496         if (col > j && j < cstart) {
2497           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2498           break;
2499         } else if (col > j + n && j >= cstart) {
2500           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2501           break;
2502         }
2503       }
2504       if (j == ncols && ncols < A->cmap->N - n) {
2505         /* a hole is outside compressed Bcols */
2506         if (ncols == 0) {
2507           if (cstart) {
2508             offdiagIdx[r] = 0;
2509           } else offdiagIdx[r] = cend;
2510         } else { /* ncols > 0 */
2511           offdiagIdx[r] = cmap[ncols - 1] + 1;
2512           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2513         }
2514       }
2515     }
2516 
2517     for (j = 0; j < ncols; j++) {
2518       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2519         offdiagA[r]   = *ba;
2520         offdiagIdx[r] = cmap[*bj];
2521       }
2522       ba++;
2523       bj++;
2524     }
2525   }
2526 
2527   PetscCall(VecGetArrayWrite(v, &a));
2528   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2529   for (r = 0; r < m; ++r) {
2530     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2531       a[r] = diagA[r];
2532       if (idx) idx[r] = cstart + diagIdx[r];
2533     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2534       a[r] = diagA[r];
2535       if (idx) {
2536         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2537           idx[r] = cstart + diagIdx[r];
2538         } else idx[r] = offdiagIdx[r];
2539       }
2540     } else {
2541       a[r] = offdiagA[r];
2542       if (idx) idx[r] = offdiagIdx[r];
2543     }
2544   }
2545   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2546   PetscCall(VecRestoreArrayWrite(v, &a));
2547   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2548   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2549   PetscCall(VecDestroy(&diagV));
2550   PetscCall(VecDestroy(&offdiagV));
2551   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2552   PetscFunctionReturn(PETSC_SUCCESS);
2553 }
2554 
2555 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2556 {
2557   Mat *dummy;
2558 
2559   PetscFunctionBegin;
2560   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2561   *newmat = *dummy;
2562   PetscCall(PetscFree(dummy));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2567 {
2568   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatInvertBlockDiagonal(a->A, values));
2572   A->factorerrortype = a->A->factorerrortype;
2573   PetscFunctionReturn(PETSC_SUCCESS);
2574 }
2575 
2576 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2577 {
2578   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2579 
2580   PetscFunctionBegin;
2581   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2582   PetscCall(MatSetRandom(aij->A, rctx));
2583   if (x->assembled) {
2584     PetscCall(MatSetRandom(aij->B, rctx));
2585   } else {
2586     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2587   }
2588   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2589   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2590   PetscFunctionReturn(PETSC_SUCCESS);
2591 }
2592 
2593 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2594 {
2595   PetscFunctionBegin;
2596   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2597   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2598   PetscFunctionReturn(PETSC_SUCCESS);
2599 }
2600 
2601 /*@
2602   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2603 
2604   Not Collective
2605 
2606   Input Parameter:
2607 . A - the matrix
2608 
2609   Output Parameter:
2610 . nz - the number of nonzeros
2611 
2612   Level: advanced
2613 
2614 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2615 @*/
2616 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2617 {
2618   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2619   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2620   PetscBool   isaij;
2621 
2622   PetscFunctionBegin;
2623   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2624   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2625   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2626   PetscFunctionReturn(PETSC_SUCCESS);
2627 }
2628 
2629 /*@
2630   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2631 
2632   Collective
2633 
2634   Input Parameters:
2635 + A  - the matrix
2636 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2637 
2638   Level: advanced
2639 
2640 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2641 @*/
2642 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2643 {
2644   PetscFunctionBegin;
2645   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2646   PetscFunctionReturn(PETSC_SUCCESS);
2647 }
2648 
2649 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2650 {
2651   PetscBool sc = PETSC_FALSE, flg;
2652 
2653   PetscFunctionBegin;
2654   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2655   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2656   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2657   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2658   PetscOptionsHeadEnd();
2659   PetscFunctionReturn(PETSC_SUCCESS);
2660 }
2661 
2662 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2663 {
2664   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2665   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2666 
2667   PetscFunctionBegin;
2668   if (!Y->preallocated) {
2669     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2670   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2671     PetscInt nonew = aij->nonew;
2672     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2673     aij->nonew = nonew;
2674   }
2675   PetscCall(MatShift_Basic(Y, a));
2676   PetscFunctionReturn(PETSC_SUCCESS);
2677 }
2678 
2679 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2680 {
2681   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2682 
2683   PetscFunctionBegin;
2684   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2685   PetscCall(MatMissingDiagonal(a->A, missing, d));
2686   if (d) {
2687     PetscInt rstart;
2688     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2689     *d += rstart;
2690   }
2691   PetscFunctionReturn(PETSC_SUCCESS);
2692 }
2693 
2694 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2695 {
2696   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2697 
2698   PetscFunctionBegin;
2699   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2700   PetscFunctionReturn(PETSC_SUCCESS);
2701 }
2702 
2703 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2704 {
2705   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2706 
2707   PetscFunctionBegin;
2708   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2709   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2710   PetscFunctionReturn(PETSC_SUCCESS);
2711 }
2712 
2713 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2714                                        MatGetRow_MPIAIJ,
2715                                        MatRestoreRow_MPIAIJ,
2716                                        MatMult_MPIAIJ,
2717                                        /* 4*/ MatMultAdd_MPIAIJ,
2718                                        MatMultTranspose_MPIAIJ,
2719                                        MatMultTransposeAdd_MPIAIJ,
2720                                        NULL,
2721                                        NULL,
2722                                        NULL,
2723                                        /*10*/ NULL,
2724                                        NULL,
2725                                        NULL,
2726                                        MatSOR_MPIAIJ,
2727                                        MatTranspose_MPIAIJ,
2728                                        /*15*/ MatGetInfo_MPIAIJ,
2729                                        MatEqual_MPIAIJ,
2730                                        MatGetDiagonal_MPIAIJ,
2731                                        MatDiagonalScale_MPIAIJ,
2732                                        MatNorm_MPIAIJ,
2733                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2734                                        MatAssemblyEnd_MPIAIJ,
2735                                        MatSetOption_MPIAIJ,
2736                                        MatZeroEntries_MPIAIJ,
2737                                        /*24*/ MatZeroRows_MPIAIJ,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                        NULL,
2742                                        /*29*/ MatSetUp_MPI_Hash,
2743                                        NULL,
2744                                        NULL,
2745                                        MatGetDiagonalBlock_MPIAIJ,
2746                                        NULL,
2747                                        /*34*/ MatDuplicate_MPIAIJ,
2748                                        NULL,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        /*39*/ MatAXPY_MPIAIJ,
2753                                        MatCreateSubMatrices_MPIAIJ,
2754                                        MatIncreaseOverlap_MPIAIJ,
2755                                        MatGetValues_MPIAIJ,
2756                                        MatCopy_MPIAIJ,
2757                                        /*44*/ MatGetRowMax_MPIAIJ,
2758                                        MatScale_MPIAIJ,
2759                                        MatShift_MPIAIJ,
2760                                        MatDiagonalSet_MPIAIJ,
2761                                        MatZeroRowsColumns_MPIAIJ,
2762                                        /*49*/ MatSetRandom_MPIAIJ,
2763                                        MatGetRowIJ_MPIAIJ,
2764                                        MatRestoreRowIJ_MPIAIJ,
2765                                        NULL,
2766                                        NULL,
2767                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2768                                        NULL,
2769                                        MatSetUnfactored_MPIAIJ,
2770                                        MatPermute_MPIAIJ,
2771                                        NULL,
2772                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2773                                        MatDestroy_MPIAIJ,
2774                                        MatView_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                        /*64*/ NULL,
2778                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2779                                        NULL,
2780                                        NULL,
2781                                        NULL,
2782                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2783                                        MatGetRowMinAbs_MPIAIJ,
2784                                        NULL,
2785                                        NULL,
2786                                        NULL,
2787                                        NULL,
2788                                        /*75*/ MatFDColoringApply_AIJ,
2789                                        MatSetFromOptions_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        MatFindZeroDiagonals_MPIAIJ,
2793                                        /*80*/ NULL,
2794                                        NULL,
2795                                        NULL,
2796                                        /*83*/ MatLoad_MPIAIJ,
2797                                        NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        /*89*/ NULL,
2803                                        NULL,
2804                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2805                                        NULL,
2806                                        NULL,
2807                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        MatBindToCPU_MPIAIJ,
2812                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2813                                        NULL,
2814                                        NULL,
2815                                        MatConjugate_MPIAIJ,
2816                                        NULL,
2817                                        /*104*/ MatSetValuesRow_MPIAIJ,
2818                                        MatRealPart_MPIAIJ,
2819                                        MatImaginaryPart_MPIAIJ,
2820                                        NULL,
2821                                        NULL,
2822                                        /*109*/ NULL,
2823                                        NULL,
2824                                        MatGetRowMin_MPIAIJ,
2825                                        NULL,
2826                                        MatMissingDiagonal_MPIAIJ,
2827                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2828                                        NULL,
2829                                        MatGetGhosts_MPIAIJ,
2830                                        NULL,
2831                                        NULL,
2832                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        NULL,
2836                                        MatGetMultiProcBlock_MPIAIJ,
2837                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2838                                        MatGetColumnReductions_MPIAIJ,
2839                                        MatInvertBlockDiagonal_MPIAIJ,
2840                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2841                                        MatCreateSubMatricesMPI_MPIAIJ,
2842                                        /*129*/ NULL,
2843                                        NULL,
2844                                        NULL,
2845                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2846                                        NULL,
2847                                        /*134*/ NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        NULL,
2851                                        NULL,
2852                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2853                                        NULL,
2854                                        NULL,
2855                                        MatFDColoringSetUp_MPIXAIJ,
2856                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2857                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2858                                        /*145*/ NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        MatCreateGraph_Simple_AIJ,
2862                                        NULL,
2863                                        /*150*/ NULL,
2864                                        MatEliminateZeros_MPIAIJ,
2865                                        MatGetRowSumAbs_MPIAIJ};
2866 
2867 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2868 {
2869   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2870 
2871   PetscFunctionBegin;
2872   PetscCall(MatStoreValues(aij->A));
2873   PetscCall(MatStoreValues(aij->B));
2874   PetscFunctionReturn(PETSC_SUCCESS);
2875 }
2876 
2877 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2878 {
2879   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2880 
2881   PetscFunctionBegin;
2882   PetscCall(MatRetrieveValues(aij->A));
2883   PetscCall(MatRetrieveValues(aij->B));
2884   PetscFunctionReturn(PETSC_SUCCESS);
2885 }
2886 
2887 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2888 {
2889   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2890   PetscMPIInt size;
2891 
2892   PetscFunctionBegin;
2893   if (B->hash_active) {
2894     B->ops[0]      = b->cops;
2895     B->hash_active = PETSC_FALSE;
2896   }
2897   PetscCall(PetscLayoutSetUp(B->rmap));
2898   PetscCall(PetscLayoutSetUp(B->cmap));
2899 
2900 #if defined(PETSC_USE_CTABLE)
2901   PetscCall(PetscHMapIDestroy(&b->colmap));
2902 #else
2903   PetscCall(PetscFree(b->colmap));
2904 #endif
2905   PetscCall(PetscFree(b->garray));
2906   PetscCall(VecDestroy(&b->lvec));
2907   PetscCall(VecScatterDestroy(&b->Mvctx));
2908 
2909   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2910 
2911   MatSeqXAIJGetOptions_Private(b->B);
2912   PetscCall(MatDestroy(&b->B));
2913   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2914   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2915   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2916   PetscCall(MatSetType(b->B, MATSEQAIJ));
2917   MatSeqXAIJRestoreOptions_Private(b->B);
2918 
2919   MatSeqXAIJGetOptions_Private(b->A);
2920   PetscCall(MatDestroy(&b->A));
2921   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2922   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2923   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2924   PetscCall(MatSetType(b->A, MATSEQAIJ));
2925   MatSeqXAIJRestoreOptions_Private(b->A);
2926 
2927   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2928   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2929   B->preallocated  = PETSC_TRUE;
2930   B->was_assembled = PETSC_FALSE;
2931   B->assembled     = PETSC_FALSE;
2932   PetscFunctionReturn(PETSC_SUCCESS);
2933 }
2934 
2935 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2936 {
2937   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2938 
2939   PetscFunctionBegin;
2940   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2941   PetscCall(PetscLayoutSetUp(B->rmap));
2942   PetscCall(PetscLayoutSetUp(B->cmap));
2943 
2944 #if defined(PETSC_USE_CTABLE)
2945   PetscCall(PetscHMapIDestroy(&b->colmap));
2946 #else
2947   PetscCall(PetscFree(b->colmap));
2948 #endif
2949   PetscCall(PetscFree(b->garray));
2950   PetscCall(VecDestroy(&b->lvec));
2951   PetscCall(VecScatterDestroy(&b->Mvctx));
2952 
2953   PetscCall(MatResetPreallocation(b->A));
2954   PetscCall(MatResetPreallocation(b->B));
2955   B->preallocated  = PETSC_TRUE;
2956   B->was_assembled = PETSC_FALSE;
2957   B->assembled     = PETSC_FALSE;
2958   PetscFunctionReturn(PETSC_SUCCESS);
2959 }
2960 
2961 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2962 {
2963   Mat         mat;
2964   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2965 
2966   PetscFunctionBegin;
2967   *newmat = NULL;
2968   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2969   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2970   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2971   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2972   a = (Mat_MPIAIJ *)mat->data;
2973 
2974   mat->factortype = matin->factortype;
2975   mat->assembled  = matin->assembled;
2976   mat->insertmode = NOT_SET_VALUES;
2977 
2978   a->size         = oldmat->size;
2979   a->rank         = oldmat->rank;
2980   a->donotstash   = oldmat->donotstash;
2981   a->roworiented  = oldmat->roworiented;
2982   a->rowindices   = NULL;
2983   a->rowvalues    = NULL;
2984   a->getrowactive = PETSC_FALSE;
2985 
2986   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2987   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2988   if (matin->hash_active) {
2989     PetscCall(MatSetUp(mat));
2990   } else {
2991     mat->preallocated = matin->preallocated;
2992     if (oldmat->colmap) {
2993 #if defined(PETSC_USE_CTABLE)
2994       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2995 #else
2996       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
2997       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
2998 #endif
2999     } else a->colmap = NULL;
3000     if (oldmat->garray) {
3001       PetscInt len;
3002       len = oldmat->B->cmap->n;
3003       PetscCall(PetscMalloc1(len + 1, &a->garray));
3004       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3005     } else a->garray = NULL;
3006 
3007     /* It may happen MatDuplicate is called with a non-assembled matrix
3008       In fact, MatDuplicate only requires the matrix to be preallocated
3009       This may happen inside a DMCreateMatrix_Shell */
3010     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3011     if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3012     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3013     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3014   }
3015   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3016   *newmat = mat;
3017   PetscFunctionReturn(PETSC_SUCCESS);
3018 }
3019 
3020 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3021 {
3022   PetscBool isbinary, ishdf5;
3023 
3024   PetscFunctionBegin;
3025   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3026   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3027   /* force binary viewer to load .info file if it has not yet done so */
3028   PetscCall(PetscViewerSetUp(viewer));
3029   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3030   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3031   if (isbinary) {
3032     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3033   } else if (ishdf5) {
3034 #if defined(PETSC_HAVE_HDF5)
3035     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3036 #else
3037     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3038 #endif
3039   } else {
3040     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3041   }
3042   PetscFunctionReturn(PETSC_SUCCESS);
3043 }
3044 
3045 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3046 {
3047   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3048   PetscInt    *rowidxs, *colidxs;
3049   PetscScalar *matvals;
3050 
3051   PetscFunctionBegin;
3052   PetscCall(PetscViewerSetUp(viewer));
3053 
3054   /* read in matrix header */
3055   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3056   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3057   M  = header[1];
3058   N  = header[2];
3059   nz = header[3];
3060   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3061   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3062   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3063 
3064   /* set block sizes from the viewer's .info file */
3065   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3066   /* set global sizes if not set already */
3067   if (mat->rmap->N < 0) mat->rmap->N = M;
3068   if (mat->cmap->N < 0) mat->cmap->N = N;
3069   PetscCall(PetscLayoutSetUp(mat->rmap));
3070   PetscCall(PetscLayoutSetUp(mat->cmap));
3071 
3072   /* check if the matrix sizes are correct */
3073   PetscCall(MatGetSize(mat, &rows, &cols));
3074   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3075 
3076   /* read in row lengths and build row indices */
3077   PetscCall(MatGetLocalSize(mat, &m, NULL));
3078   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3079   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3080   rowidxs[0] = 0;
3081   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3082   if (nz != PETSC_MAX_INT) {
3083     PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3084     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3085   }
3086 
3087   /* read in column indices and matrix values */
3088   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3089   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3090   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3091   /* store matrix indices and values */
3092   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3093   PetscCall(PetscFree(rowidxs));
3094   PetscCall(PetscFree2(colidxs, matvals));
3095   PetscFunctionReturn(PETSC_SUCCESS);
3096 }
3097 
3098 /* Not scalable because of ISAllGather() unless getting all columns. */
3099 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3100 {
3101   IS          iscol_local;
3102   PetscBool   isstride;
3103   PetscMPIInt lisstride = 0, gisstride;
3104 
3105   PetscFunctionBegin;
3106   /* check if we are grabbing all columns*/
3107   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3108 
3109   if (isstride) {
3110     PetscInt start, len, mstart, mlen;
3111     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3112     PetscCall(ISGetLocalSize(iscol, &len));
3113     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3114     if (mstart == start && mlen - mstart == len) lisstride = 1;
3115   }
3116 
3117   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3118   if (gisstride) {
3119     PetscInt N;
3120     PetscCall(MatGetSize(mat, NULL, &N));
3121     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3122     PetscCall(ISSetIdentity(iscol_local));
3123     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3124   } else {
3125     PetscInt cbs;
3126     PetscCall(ISGetBlockSize(iscol, &cbs));
3127     PetscCall(ISAllGather(iscol, &iscol_local));
3128     PetscCall(ISSetBlockSize(iscol_local, cbs));
3129   }
3130 
3131   *isseq = iscol_local;
3132   PetscFunctionReturn(PETSC_SUCCESS);
3133 }
3134 
3135 /*
3136  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3137  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3138 
3139  Input Parameters:
3140 +   mat - matrix
3141 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3142            i.e., mat->rstart <= isrow[i] < mat->rend
3143 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3144            i.e., mat->cstart <= iscol[i] < mat->cend
3145 
3146  Output Parameters:
3147 +   isrow_d - sequential row index set for retrieving mat->A
3148 .   iscol_d - sequential  column index set for retrieving mat->A
3149 .   iscol_o - sequential column index set for retrieving mat->B
3150 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3151  */
3152 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3153 {
3154   Vec             x, cmap;
3155   const PetscInt *is_idx;
3156   PetscScalar    *xarray, *cmaparray;
3157   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3158   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3159   Mat             B    = a->B;
3160   Vec             lvec = a->lvec, lcmap;
3161   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3162   MPI_Comm        comm;
3163   VecScatter      Mvctx = a->Mvctx;
3164 
3165   PetscFunctionBegin;
3166   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3167   PetscCall(ISGetLocalSize(iscol, &ncols));
3168 
3169   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3170   PetscCall(MatCreateVecs(mat, &x, NULL));
3171   PetscCall(VecSet(x, -1.0));
3172   PetscCall(VecDuplicate(x, &cmap));
3173   PetscCall(VecSet(cmap, -1.0));
3174 
3175   /* Get start indices */
3176   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3177   isstart -= ncols;
3178   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3179 
3180   PetscCall(ISGetIndices(iscol, &is_idx));
3181   PetscCall(VecGetArray(x, &xarray));
3182   PetscCall(VecGetArray(cmap, &cmaparray));
3183   PetscCall(PetscMalloc1(ncols, &idx));
3184   for (i = 0; i < ncols; i++) {
3185     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3186     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3187     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3188   }
3189   PetscCall(VecRestoreArray(x, &xarray));
3190   PetscCall(VecRestoreArray(cmap, &cmaparray));
3191   PetscCall(ISRestoreIndices(iscol, &is_idx));
3192 
3193   /* Get iscol_d */
3194   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3195   PetscCall(ISGetBlockSize(iscol, &i));
3196   PetscCall(ISSetBlockSize(*iscol_d, i));
3197 
3198   /* Get isrow_d */
3199   PetscCall(ISGetLocalSize(isrow, &m));
3200   rstart = mat->rmap->rstart;
3201   PetscCall(PetscMalloc1(m, &idx));
3202   PetscCall(ISGetIndices(isrow, &is_idx));
3203   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3204   PetscCall(ISRestoreIndices(isrow, &is_idx));
3205 
3206   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3207   PetscCall(ISGetBlockSize(isrow, &i));
3208   PetscCall(ISSetBlockSize(*isrow_d, i));
3209 
3210   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3211   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3212   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3213 
3214   PetscCall(VecDuplicate(lvec, &lcmap));
3215 
3216   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3217   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3218 
3219   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3220   /* off-process column indices */
3221   count = 0;
3222   PetscCall(PetscMalloc1(Bn, &idx));
3223   PetscCall(PetscMalloc1(Bn, &cmap1));
3224 
3225   PetscCall(VecGetArray(lvec, &xarray));
3226   PetscCall(VecGetArray(lcmap, &cmaparray));
3227   for (i = 0; i < Bn; i++) {
3228     if (PetscRealPart(xarray[i]) > -1.0) {
3229       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3230       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3231       count++;
3232     }
3233   }
3234   PetscCall(VecRestoreArray(lvec, &xarray));
3235   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3236 
3237   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3238   /* cannot ensure iscol_o has same blocksize as iscol! */
3239 
3240   PetscCall(PetscFree(idx));
3241   *garray = cmap1;
3242 
3243   PetscCall(VecDestroy(&x));
3244   PetscCall(VecDestroy(&cmap));
3245   PetscCall(VecDestroy(&lcmap));
3246   PetscFunctionReturn(PETSC_SUCCESS);
3247 }
3248 
3249 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3250 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3251 {
3252   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3253   Mat         M = NULL;
3254   MPI_Comm    comm;
3255   IS          iscol_d, isrow_d, iscol_o;
3256   Mat         Asub = NULL, Bsub = NULL;
3257   PetscInt    n;
3258 
3259   PetscFunctionBegin;
3260   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3261 
3262   if (call == MAT_REUSE_MATRIX) {
3263     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3264     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3265     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3266 
3267     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3268     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3269 
3270     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3271     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3272 
3273     /* Update diagonal and off-diagonal portions of submat */
3274     asub = (Mat_MPIAIJ *)(*submat)->data;
3275     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3276     PetscCall(ISGetLocalSize(iscol_o, &n));
3277     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3278     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3279     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3280 
3281   } else { /* call == MAT_INITIAL_MATRIX) */
3282     const PetscInt *garray;
3283     PetscInt        BsubN;
3284 
3285     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3286     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3287 
3288     /* Create local submatrices Asub and Bsub */
3289     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3290     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3291 
3292     /* Create submatrix M */
3293     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3294 
3295     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3296     asub = (Mat_MPIAIJ *)M->data;
3297 
3298     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3299     n = asub->B->cmap->N;
3300     if (BsubN > n) {
3301       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3302       const PetscInt *idx;
3303       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3304       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3305 
3306       PetscCall(PetscMalloc1(n, &idx_new));
3307       j = 0;
3308       PetscCall(ISGetIndices(iscol_o, &idx));
3309       for (i = 0; i < n; i++) {
3310         if (j >= BsubN) break;
3311         while (subgarray[i] > garray[j]) j++;
3312 
3313         if (subgarray[i] == garray[j]) {
3314           idx_new[i] = idx[j++];
3315         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3316       }
3317       PetscCall(ISRestoreIndices(iscol_o, &idx));
3318 
3319       PetscCall(ISDestroy(&iscol_o));
3320       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3321 
3322     } else if (BsubN < n) {
3323       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3324     }
3325 
3326     PetscCall(PetscFree(garray));
3327     *submat = M;
3328 
3329     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3330     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3331     PetscCall(ISDestroy(&isrow_d));
3332 
3333     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3334     PetscCall(ISDestroy(&iscol_d));
3335 
3336     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3337     PetscCall(ISDestroy(&iscol_o));
3338   }
3339   PetscFunctionReturn(PETSC_SUCCESS);
3340 }
3341 
3342 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3343 {
3344   IS        iscol_local = NULL, isrow_d;
3345   PetscInt  csize;
3346   PetscInt  n, i, j, start, end;
3347   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3348   MPI_Comm  comm;
3349 
3350   PetscFunctionBegin;
3351   /* If isrow has same processor distribution as mat,
3352      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3353   if (call == MAT_REUSE_MATRIX) {
3354     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3355     if (isrow_d) {
3356       sameRowDist  = PETSC_TRUE;
3357       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3358     } else {
3359       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3360       if (iscol_local) {
3361         sameRowDist  = PETSC_TRUE;
3362         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3363       }
3364     }
3365   } else {
3366     /* Check if isrow has same processor distribution as mat */
3367     sameDist[0] = PETSC_FALSE;
3368     PetscCall(ISGetLocalSize(isrow, &n));
3369     if (!n) {
3370       sameDist[0] = PETSC_TRUE;
3371     } else {
3372       PetscCall(ISGetMinMax(isrow, &i, &j));
3373       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3374       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3375     }
3376 
3377     /* Check if iscol has same processor distribution as mat */
3378     sameDist[1] = PETSC_FALSE;
3379     PetscCall(ISGetLocalSize(iscol, &n));
3380     if (!n) {
3381       sameDist[1] = PETSC_TRUE;
3382     } else {
3383       PetscCall(ISGetMinMax(iscol, &i, &j));
3384       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3385       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3386     }
3387 
3388     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3389     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3390     sameRowDist = tsameDist[0];
3391   }
3392 
3393   if (sameRowDist) {
3394     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3395       /* isrow and iscol have same processor distribution as mat */
3396       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3397       PetscFunctionReturn(PETSC_SUCCESS);
3398     } else { /* sameRowDist */
3399       /* isrow has same processor distribution as mat */
3400       if (call == MAT_INITIAL_MATRIX) {
3401         PetscBool sorted;
3402         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3403         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3404         PetscCall(ISGetSize(iscol, &i));
3405         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3406 
3407         PetscCall(ISSorted(iscol_local, &sorted));
3408         if (sorted) {
3409           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3410           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3411           PetscFunctionReturn(PETSC_SUCCESS);
3412         }
3413       } else { /* call == MAT_REUSE_MATRIX */
3414         IS iscol_sub;
3415         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3416         if (iscol_sub) {
3417           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3418           PetscFunctionReturn(PETSC_SUCCESS);
3419         }
3420       }
3421     }
3422   }
3423 
3424   /* General case: iscol -> iscol_local which has global size of iscol */
3425   if (call == MAT_REUSE_MATRIX) {
3426     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3427     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3428   } else {
3429     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3430   }
3431 
3432   PetscCall(ISGetLocalSize(iscol, &csize));
3433   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3434 
3435   if (call == MAT_INITIAL_MATRIX) {
3436     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3437     PetscCall(ISDestroy(&iscol_local));
3438   }
3439   PetscFunctionReturn(PETSC_SUCCESS);
3440 }
3441 
3442 /*@C
3443   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3444   and "off-diagonal" part of the matrix in CSR format.
3445 
3446   Collective
3447 
3448   Input Parameters:
3449 + comm   - MPI communicator
3450 . A      - "diagonal" portion of matrix
3451 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3452 - garray - global index of `B` columns
3453 
3454   Output Parameter:
3455 . mat - the matrix, with input `A` as its local diagonal matrix
3456 
3457   Level: advanced
3458 
3459   Notes:
3460   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3461 
3462   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3463 
3464 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3465 @*/
3466 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3467 {
3468   Mat_MPIAIJ        *maij;
3469   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3470   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3471   const PetscScalar *oa;
3472   Mat                Bnew;
3473   PetscInt           m, n, N;
3474   MatType            mpi_mat_type;
3475 
3476   PetscFunctionBegin;
3477   PetscCall(MatCreate(comm, mat));
3478   PetscCall(MatGetSize(A, &m, &n));
3479   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3480   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3481   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3482   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3483 
3484   /* Get global columns of mat */
3485   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3486 
3487   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3488   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3489   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3490   PetscCall(MatSetType(*mat, mpi_mat_type));
3491 
3492   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3493   maij = (Mat_MPIAIJ *)(*mat)->data;
3494 
3495   (*mat)->preallocated = PETSC_TRUE;
3496 
3497   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3498   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3499 
3500   /* Set A as diagonal portion of *mat */
3501   maij->A = A;
3502 
3503   nz = oi[m];
3504   for (i = 0; i < nz; i++) {
3505     col   = oj[i];
3506     oj[i] = garray[col];
3507   }
3508 
3509   /* Set Bnew as off-diagonal portion of *mat */
3510   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3511   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3512   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3513   bnew        = (Mat_SeqAIJ *)Bnew->data;
3514   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3515   maij->B     = Bnew;
3516 
3517   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3518 
3519   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3520   b->free_a       = PETSC_FALSE;
3521   b->free_ij      = PETSC_FALSE;
3522   PetscCall(MatDestroy(&B));
3523 
3524   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3525   bnew->free_a       = PETSC_TRUE;
3526   bnew->free_ij      = PETSC_TRUE;
3527 
3528   /* condense columns of maij->B */
3529   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3530   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3531   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3532   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3533   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3534   PetscFunctionReturn(PETSC_SUCCESS);
3535 }
3536 
3537 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3538 
3539 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3540 {
3541   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3542   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3543   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3544   Mat             M, Msub, B = a->B;
3545   MatScalar      *aa;
3546   Mat_SeqAIJ     *aij;
3547   PetscInt       *garray = a->garray, *colsub, Ncols;
3548   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3549   IS              iscol_sub, iscmap;
3550   const PetscInt *is_idx, *cmap;
3551   PetscBool       allcolumns = PETSC_FALSE;
3552   MPI_Comm        comm;
3553 
3554   PetscFunctionBegin;
3555   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3556   if (call == MAT_REUSE_MATRIX) {
3557     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3558     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3559     PetscCall(ISGetLocalSize(iscol_sub, &count));
3560 
3561     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3562     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3563 
3564     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3565     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3566 
3567     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3568 
3569   } else { /* call == MAT_INITIAL_MATRIX) */
3570     PetscBool flg;
3571 
3572     PetscCall(ISGetLocalSize(iscol, &n));
3573     PetscCall(ISGetSize(iscol, &Ncols));
3574 
3575     /* (1) iscol -> nonscalable iscol_local */
3576     /* Check for special case: each processor gets entire matrix columns */
3577     PetscCall(ISIdentity(iscol_local, &flg));
3578     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3579     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3580     if (allcolumns) {
3581       iscol_sub = iscol_local;
3582       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3583       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3584 
3585     } else {
3586       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3587       PetscInt *idx, *cmap1, k;
3588       PetscCall(PetscMalloc1(Ncols, &idx));
3589       PetscCall(PetscMalloc1(Ncols, &cmap1));
3590       PetscCall(ISGetIndices(iscol_local, &is_idx));
3591       count = 0;
3592       k     = 0;
3593       for (i = 0; i < Ncols; i++) {
3594         j = is_idx[i];
3595         if (j >= cstart && j < cend) {
3596           /* diagonal part of mat */
3597           idx[count]     = j;
3598           cmap1[count++] = i; /* column index in submat */
3599         } else if (Bn) {
3600           /* off-diagonal part of mat */
3601           if (j == garray[k]) {
3602             idx[count]     = j;
3603             cmap1[count++] = i; /* column index in submat */
3604           } else if (j > garray[k]) {
3605             while (j > garray[k] && k < Bn - 1) k++;
3606             if (j == garray[k]) {
3607               idx[count]     = j;
3608               cmap1[count++] = i; /* column index in submat */
3609             }
3610           }
3611         }
3612       }
3613       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3614 
3615       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3616       PetscCall(ISGetBlockSize(iscol, &cbs));
3617       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3618 
3619       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3620     }
3621 
3622     /* (3) Create sequential Msub */
3623     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3624   }
3625 
3626   PetscCall(ISGetLocalSize(iscol_sub, &count));
3627   aij = (Mat_SeqAIJ *)(Msub)->data;
3628   ii  = aij->i;
3629   PetscCall(ISGetIndices(iscmap, &cmap));
3630 
3631   /*
3632       m - number of local rows
3633       Ncols - number of columns (same on all processors)
3634       rstart - first row in new global matrix generated
3635   */
3636   PetscCall(MatGetSize(Msub, &m, NULL));
3637 
3638   if (call == MAT_INITIAL_MATRIX) {
3639     /* (4) Create parallel newmat */
3640     PetscMPIInt rank, size;
3641     PetscInt    csize;
3642 
3643     PetscCallMPI(MPI_Comm_size(comm, &size));
3644     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3645 
3646     /*
3647         Determine the number of non-zeros in the diagonal and off-diagonal
3648         portions of the matrix in order to do correct preallocation
3649     */
3650 
3651     /* first get start and end of "diagonal" columns */
3652     PetscCall(ISGetLocalSize(iscol, &csize));
3653     if (csize == PETSC_DECIDE) {
3654       PetscCall(ISGetSize(isrow, &mglobal));
3655       if (mglobal == Ncols) { /* square matrix */
3656         nlocal = m;
3657       } else {
3658         nlocal = Ncols / size + ((Ncols % size) > rank);
3659       }
3660     } else {
3661       nlocal = csize;
3662     }
3663     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3664     rstart = rend - nlocal;
3665     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3666 
3667     /* next, compute all the lengths */
3668     jj = aij->j;
3669     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3670     olens = dlens + m;
3671     for (i = 0; i < m; i++) {
3672       jend = ii[i + 1] - ii[i];
3673       olen = 0;
3674       dlen = 0;
3675       for (j = 0; j < jend; j++) {
3676         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3677         else dlen++;
3678         jj++;
3679       }
3680       olens[i] = olen;
3681       dlens[i] = dlen;
3682     }
3683 
3684     PetscCall(ISGetBlockSize(isrow, &bs));
3685     PetscCall(ISGetBlockSize(iscol, &cbs));
3686 
3687     PetscCall(MatCreate(comm, &M));
3688     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3689     PetscCall(MatSetBlockSizes(M, bs, cbs));
3690     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3691     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3692     PetscCall(PetscFree(dlens));
3693 
3694   } else { /* call == MAT_REUSE_MATRIX */
3695     M = *newmat;
3696     PetscCall(MatGetLocalSize(M, &i, NULL));
3697     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3698     PetscCall(MatZeroEntries(M));
3699     /*
3700          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3701        rather than the slower MatSetValues().
3702     */
3703     M->was_assembled = PETSC_TRUE;
3704     M->assembled     = PETSC_FALSE;
3705   }
3706 
3707   /* (5) Set values of Msub to *newmat */
3708   PetscCall(PetscMalloc1(count, &colsub));
3709   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3710 
3711   jj = aij->j;
3712   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3713   for (i = 0; i < m; i++) {
3714     row = rstart + i;
3715     nz  = ii[i + 1] - ii[i];
3716     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3717     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3718     jj += nz;
3719     aa += nz;
3720   }
3721   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3722   PetscCall(ISRestoreIndices(iscmap, &cmap));
3723 
3724   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3725   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3726 
3727   PetscCall(PetscFree(colsub));
3728 
3729   /* save Msub, iscol_sub and iscmap used in processor for next request */
3730   if (call == MAT_INITIAL_MATRIX) {
3731     *newmat = M;
3732     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3733     PetscCall(MatDestroy(&Msub));
3734 
3735     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3736     PetscCall(ISDestroy(&iscol_sub));
3737 
3738     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3739     PetscCall(ISDestroy(&iscmap));
3740 
3741     if (iscol_local) {
3742       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3743       PetscCall(ISDestroy(&iscol_local));
3744     }
3745   }
3746   PetscFunctionReturn(PETSC_SUCCESS);
3747 }
3748 
3749 /*
3750     Not great since it makes two copies of the submatrix, first an SeqAIJ
3751   in local and then by concatenating the local matrices the end result.
3752   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3753 
3754   This requires a sequential iscol with all indices.
3755 */
3756 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3757 {
3758   PetscMPIInt rank, size;
3759   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3760   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3761   Mat         M, Mreuse;
3762   MatScalar  *aa, *vwork;
3763   MPI_Comm    comm;
3764   Mat_SeqAIJ *aij;
3765   PetscBool   colflag, allcolumns = PETSC_FALSE;
3766 
3767   PetscFunctionBegin;
3768   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3769   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3770   PetscCallMPI(MPI_Comm_size(comm, &size));
3771 
3772   /* Check for special case: each processor gets entire matrix columns */
3773   PetscCall(ISIdentity(iscol, &colflag));
3774   PetscCall(ISGetLocalSize(iscol, &n));
3775   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3776   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3777 
3778   if (call == MAT_REUSE_MATRIX) {
3779     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3780     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3781     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3782   } else {
3783     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3784   }
3785 
3786   /*
3787       m - number of local rows
3788       n - number of columns (same on all processors)
3789       rstart - first row in new global matrix generated
3790   */
3791   PetscCall(MatGetSize(Mreuse, &m, &n));
3792   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3793   if (call == MAT_INITIAL_MATRIX) {
3794     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3795     ii  = aij->i;
3796     jj  = aij->j;
3797 
3798     /*
3799         Determine the number of non-zeros in the diagonal and off-diagonal
3800         portions of the matrix in order to do correct preallocation
3801     */
3802 
3803     /* first get start and end of "diagonal" columns */
3804     if (csize == PETSC_DECIDE) {
3805       PetscCall(ISGetSize(isrow, &mglobal));
3806       if (mglobal == n) { /* square matrix */
3807         nlocal = m;
3808       } else {
3809         nlocal = n / size + ((n % size) > rank);
3810       }
3811     } else {
3812       nlocal = csize;
3813     }
3814     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3815     rstart = rend - nlocal;
3816     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3817 
3818     /* next, compute all the lengths */
3819     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3820     olens = dlens + m;
3821     for (i = 0; i < m; i++) {
3822       jend = ii[i + 1] - ii[i];
3823       olen = 0;
3824       dlen = 0;
3825       for (j = 0; j < jend; j++) {
3826         if (*jj < rstart || *jj >= rend) olen++;
3827         else dlen++;
3828         jj++;
3829       }
3830       olens[i] = olen;
3831       dlens[i] = dlen;
3832     }
3833     PetscCall(MatCreate(comm, &M));
3834     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3835     PetscCall(MatSetBlockSizes(M, bs, cbs));
3836     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3837     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3838     PetscCall(PetscFree(dlens));
3839   } else {
3840     PetscInt ml, nl;
3841 
3842     M = *newmat;
3843     PetscCall(MatGetLocalSize(M, &ml, &nl));
3844     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3845     PetscCall(MatZeroEntries(M));
3846     /*
3847          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3848        rather than the slower MatSetValues().
3849     */
3850     M->was_assembled = PETSC_TRUE;
3851     M->assembled     = PETSC_FALSE;
3852   }
3853   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3854   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3855   ii  = aij->i;
3856   jj  = aij->j;
3857 
3858   /* trigger copy to CPU if needed */
3859   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3860   for (i = 0; i < m; i++) {
3861     row   = rstart + i;
3862     nz    = ii[i + 1] - ii[i];
3863     cwork = jj;
3864     jj    = PetscSafePointerPlusOffset(jj, nz);
3865     vwork = aa;
3866     aa    = PetscSafePointerPlusOffset(aa, nz);
3867     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3868   }
3869   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3870 
3871   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3872   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3873   *newmat = M;
3874 
3875   /* save submatrix used in processor for next request */
3876   if (call == MAT_INITIAL_MATRIX) {
3877     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3878     PetscCall(MatDestroy(&Mreuse));
3879   }
3880   PetscFunctionReturn(PETSC_SUCCESS);
3881 }
3882 
3883 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3884 {
3885   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3886   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3887   const PetscInt *JJ;
3888   PetscBool       nooffprocentries;
3889   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3890 
3891   PetscFunctionBegin;
3892   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3893 
3894   PetscCall(PetscLayoutSetUp(B->rmap));
3895   PetscCall(PetscLayoutSetUp(B->cmap));
3896   m      = B->rmap->n;
3897   cstart = B->cmap->rstart;
3898   cend   = B->cmap->rend;
3899   rstart = B->rmap->rstart;
3900 
3901   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3902 
3903   if (PetscDefined(USE_DEBUG)) {
3904     for (i = 0; i < m; i++) {
3905       nnz = Ii[i + 1] - Ii[i];
3906       JJ  = PetscSafePointerPlusOffset(J, Ii[i]);
3907       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3908       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3909       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3910     }
3911   }
3912 
3913   for (i = 0; i < m; i++) {
3914     nnz     = Ii[i + 1] - Ii[i];
3915     JJ      = PetscSafePointerPlusOffset(J, Ii[i]);
3916     nnz_max = PetscMax(nnz_max, nnz);
3917     d       = 0;
3918     for (j = 0; j < nnz; j++) {
3919       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3920     }
3921     d_nnz[i] = d;
3922     o_nnz[i] = nnz - d;
3923   }
3924   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3925   PetscCall(PetscFree2(d_nnz, o_nnz));
3926 
3927   for (i = 0; i < m; i++) {
3928     ii = i + rstart;
3929     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES));
3930   }
3931   nooffprocentries    = B->nooffprocentries;
3932   B->nooffprocentries = PETSC_TRUE;
3933   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3934   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3935   B->nooffprocentries = nooffprocentries;
3936 
3937   /* count number of entries below block diagonal */
3938   PetscCall(PetscFree(Aij->ld));
3939   PetscCall(PetscCalloc1(m, &ld));
3940   Aij->ld = ld;
3941   for (i = 0; i < m; i++) {
3942     nnz = Ii[i + 1] - Ii[i];
3943     j   = 0;
3944     while (j < nnz && J[j] < cstart) j++;
3945     ld[i] = j;
3946     if (J) J += nnz;
3947   }
3948 
3949   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3950   PetscFunctionReturn(PETSC_SUCCESS);
3951 }
3952 
3953 /*@
3954   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3955   (the default parallel PETSc format).
3956 
3957   Collective
3958 
3959   Input Parameters:
3960 + B - the matrix
3961 . i - the indices into `j` for the start of each local row (indices start with zero)
3962 . j - the column indices for each local row (indices start with zero)
3963 - v - optional values in the matrix
3964 
3965   Level: developer
3966 
3967   Notes:
3968   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3969   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3970   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3971 
3972   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3973 
3974   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3975 
3976   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3977 
3978   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3979   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3980 
3981   The format which is used for the sparse matrix input, is equivalent to a
3982   row-major ordering.. i.e for the following matrix, the input data expected is
3983   as shown
3984 .vb
3985         1 0 0
3986         2 0 3     P0
3987        -------
3988         4 5 6     P1
3989 
3990      Process0 [P0] rows_owned=[0,1]
3991         i =  {0,1,3}  [size = nrow+1  = 2+1]
3992         j =  {0,0,2}  [size = 3]
3993         v =  {1,2,3}  [size = 3]
3994 
3995      Process1 [P1] rows_owned=[2]
3996         i =  {0,3}    [size = nrow+1  = 1+1]
3997         j =  {0,1,2}  [size = 3]
3998         v =  {4,5,6}  [size = 3]
3999 .ve
4000 
4001 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4002           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4003 @*/
4004 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4005 {
4006   PetscFunctionBegin;
4007   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4008   PetscFunctionReturn(PETSC_SUCCESS);
4009 }
4010 
4011 /*@C
4012   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4013   (the default parallel PETSc format).  For good matrix assembly performance
4014   the user should preallocate the matrix storage by setting the parameters
4015   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4016 
4017   Collective
4018 
4019   Input Parameters:
4020 + B     - the matrix
4021 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4022            (same value is used for all local rows)
4023 . d_nnz - array containing the number of nonzeros in the various rows of the
4024            DIAGONAL portion of the local submatrix (possibly different for each row)
4025            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4026            The size of this array is equal to the number of local rows, i.e 'm'.
4027            For matrices that will be factored, you must leave room for (and set)
4028            the diagonal entry even if it is zero.
4029 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4030            submatrix (same value is used for all local rows).
4031 - o_nnz - array containing the number of nonzeros in the various rows of the
4032            OFF-DIAGONAL portion of the local submatrix (possibly different for
4033            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4034            structure. The size of this array is equal to the number
4035            of local rows, i.e 'm'.
4036 
4037   Example Usage:
4038   Consider the following 8x8 matrix with 34 non-zero values, that is
4039   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4040   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4041   as follows
4042 
4043 .vb
4044             1  2  0  |  0  3  0  |  0  4
4045     Proc0   0  5  6  |  7  0  0  |  8  0
4046             9  0 10  | 11  0  0  | 12  0
4047     -------------------------------------
4048            13  0 14  | 15 16 17  |  0  0
4049     Proc1   0 18  0  | 19 20 21  |  0  0
4050             0  0  0  | 22 23  0  | 24  0
4051     -------------------------------------
4052     Proc2  25 26 27  |  0  0 28  | 29  0
4053            30  0  0  | 31 32 33  |  0 34
4054 .ve
4055 
4056   This can be represented as a collection of submatrices as
4057 .vb
4058       A B C
4059       D E F
4060       G H I
4061 .ve
4062 
4063   Where the submatrices A,B,C are owned by proc0, D,E,F are
4064   owned by proc1, G,H,I are owned by proc2.
4065 
4066   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4067   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4068   The 'M','N' parameters are 8,8, and have the same values on all procs.
4069 
4070   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4071   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4072   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4073   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4074   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4075   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4076 
4077   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4078   allocated for every row of the local diagonal submatrix, and `o_nz`
4079   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4080   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4081   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4082   In this case, the values of `d_nz`, `o_nz` are
4083 .vb
4084      proc0  dnz = 2, o_nz = 2
4085      proc1  dnz = 3, o_nz = 2
4086      proc2  dnz = 1, o_nz = 4
4087 .ve
4088   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4089   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4090   for proc3. i.e we are using 12+15+10=37 storage locations to store
4091   34 values.
4092 
4093   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4094   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4095   In the above case the values for `d_nnz`, `o_nnz` are
4096 .vb
4097      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4098      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4099      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4100 .ve
4101   Here the space allocated is sum of all the above values i.e 34, and
4102   hence pre-allocation is perfect.
4103 
4104   Level: intermediate
4105 
4106   Notes:
4107   If the *_nnz parameter is given then the *_nz parameter is ignored
4108 
4109   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4110   storage.  The stored row and column indices begin with zero.
4111   See [Sparse Matrices](sec_matsparse) for details.
4112 
4113   The parallel matrix is partitioned such that the first m0 rows belong to
4114   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4115   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4116 
4117   The DIAGONAL portion of the local submatrix of a processor can be defined
4118   as the submatrix which is obtained by extraction the part corresponding to
4119   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4120   first row that belongs to the processor, r2 is the last row belonging to
4121   the this processor, and c1-c2 is range of indices of the local part of a
4122   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4123   common case of a square matrix, the row and column ranges are the same and
4124   the DIAGONAL part is also square. The remaining portion of the local
4125   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4126 
4127   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4128 
4129   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4130   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4131   You can also run with the option `-info` and look for messages with the string
4132   malloc in them to see if additional memory allocation was needed.
4133 
4134 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4135           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4136 @*/
4137 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4138 {
4139   PetscFunctionBegin;
4140   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4141   PetscValidType(B, 1);
4142   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4143   PetscFunctionReturn(PETSC_SUCCESS);
4144 }
4145 
4146 /*@
4147   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4148   CSR format for the local rows.
4149 
4150   Collective
4151 
4152   Input Parameters:
4153 + comm - MPI communicator
4154 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4155 . n    - This value should be the same as the local size used in creating the
4156          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4157          calculated if `N` is given) For square matrices n is almost always `m`.
4158 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4159 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4160 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4161 . j    - global column indices
4162 - a    - optional matrix values
4163 
4164   Output Parameter:
4165 . mat - the matrix
4166 
4167   Level: intermediate
4168 
4169   Notes:
4170   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4171   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4172   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4173 
4174   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4175 
4176   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4177 
4178   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4179   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4180 
4181   The format which is used for the sparse matrix input, is equivalent to a
4182   row-major ordering, i.e., for the following matrix, the input data expected is
4183   as shown
4184 .vb
4185         1 0 0
4186         2 0 3     P0
4187        -------
4188         4 5 6     P1
4189 
4190      Process0 [P0] rows_owned=[0,1]
4191         i =  {0,1,3}  [size = nrow+1  = 2+1]
4192         j =  {0,0,2}  [size = 3]
4193         v =  {1,2,3}  [size = 3]
4194 
4195      Process1 [P1] rows_owned=[2]
4196         i =  {0,3}    [size = nrow+1  = 1+1]
4197         j =  {0,1,2}  [size = 3]
4198         v =  {4,5,6}  [size = 3]
4199 .ve
4200 
4201 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4202           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4203 @*/
4204 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4205 {
4206   PetscFunctionBegin;
4207   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4208   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4209   PetscCall(MatCreate(comm, mat));
4210   PetscCall(MatSetSizes(*mat, m, n, M, N));
4211   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4212   PetscCall(MatSetType(*mat, MATMPIAIJ));
4213   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4214   PetscFunctionReturn(PETSC_SUCCESS);
4215 }
4216 
4217 /*@
4218   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4219   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4220   from `MatCreateMPIAIJWithArrays()`
4221 
4222   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4223 
4224   Collective
4225 
4226   Input Parameters:
4227 + mat - the matrix
4228 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4229 . n   - This value should be the same as the local size used in creating the
4230        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4231        calculated if N is given) For square matrices n is almost always m.
4232 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4233 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4234 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4235 . J   - column indices
4236 - v   - matrix values
4237 
4238   Level: deprecated
4239 
4240 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4241           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4242 @*/
4243 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4244 {
4245   PetscInt        nnz, i;
4246   PetscBool       nooffprocentries;
4247   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4248   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4249   PetscScalar    *ad, *ao;
4250   PetscInt        ldi, Iii, md;
4251   const PetscInt *Adi = Ad->i;
4252   PetscInt       *ld  = Aij->ld;
4253 
4254   PetscFunctionBegin;
4255   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4256   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4257   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4258   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4259 
4260   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4261   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4262 
4263   for (i = 0; i < m; i++) {
4264     if (PetscDefined(USE_DEBUG)) {
4265       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4266         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4267         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4268       }
4269     }
4270     nnz = Ii[i + 1] - Ii[i];
4271     Iii = Ii[i];
4272     ldi = ld[i];
4273     md  = Adi[i + 1] - Adi[i];
4274     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4275     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4276     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4277     ad += md;
4278     ao += nnz - md;
4279   }
4280   nooffprocentries      = mat->nooffprocentries;
4281   mat->nooffprocentries = PETSC_TRUE;
4282   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4283   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4284   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4285   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4286   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4287   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4288   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4289   mat->nooffprocentries = nooffprocentries;
4290   PetscFunctionReturn(PETSC_SUCCESS);
4291 }
4292 
4293 /*@
4294   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4295 
4296   Collective
4297 
4298   Input Parameters:
4299 + mat - the matrix
4300 - v   - matrix values, stored by row
4301 
4302   Level: intermediate
4303 
4304   Notes:
4305   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4306 
4307   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4308 
4309 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4310           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4311 @*/
4312 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4313 {
4314   PetscInt        nnz, i, m;
4315   PetscBool       nooffprocentries;
4316   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4317   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4318   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4319   PetscScalar    *ad, *ao;
4320   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4321   PetscInt        ldi, Iii, md;
4322   PetscInt       *ld = Aij->ld;
4323 
4324   PetscFunctionBegin;
4325   m = mat->rmap->n;
4326 
4327   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4328   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4329   Iii = 0;
4330   for (i = 0; i < m; i++) {
4331     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4332     ldi = ld[i];
4333     md  = Adi[i + 1] - Adi[i];
4334     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4335     ad += md;
4336     if (ao) {
4337       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4338       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4339       ao += nnz - md;
4340     }
4341     Iii += nnz;
4342   }
4343   nooffprocentries      = mat->nooffprocentries;
4344   mat->nooffprocentries = PETSC_TRUE;
4345   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4346   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4347   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4348   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4349   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4350   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4351   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4352   mat->nooffprocentries = nooffprocentries;
4353   PetscFunctionReturn(PETSC_SUCCESS);
4354 }
4355 
4356 /*@C
4357   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4358   (the default parallel PETSc format).  For good matrix assembly performance
4359   the user should preallocate the matrix storage by setting the parameters
4360   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4361 
4362   Collective
4363 
4364   Input Parameters:
4365 + comm  - MPI communicator
4366 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4367           This value should be the same as the local size used in creating the
4368           y vector for the matrix-vector product y = Ax.
4369 . n     - This value should be the same as the local size used in creating the
4370           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4371           calculated if N is given) For square matrices n is almost always m.
4372 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4373 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4374 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4375           (same value is used for all local rows)
4376 . d_nnz - array containing the number of nonzeros in the various rows of the
4377           DIAGONAL portion of the local submatrix (possibly different for each row)
4378           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4379           The size of this array is equal to the number of local rows, i.e 'm'.
4380 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4381           submatrix (same value is used for all local rows).
4382 - o_nnz - array containing the number of nonzeros in the various rows of the
4383           OFF-DIAGONAL portion of the local submatrix (possibly different for
4384           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4385           structure. The size of this array is equal to the number
4386           of local rows, i.e 'm'.
4387 
4388   Output Parameter:
4389 . A - the matrix
4390 
4391   Options Database Keys:
4392 + -mat_no_inode                     - Do not use inodes
4393 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4394 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4395                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4396                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4397 
4398   Level: intermediate
4399 
4400   Notes:
4401   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4402   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4403   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4404 
4405   If the *_nnz parameter is given then the *_nz parameter is ignored
4406 
4407   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4408   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4409   storage requirements for this matrix.
4410 
4411   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4412   processor than it must be used on all processors that share the object for
4413   that argument.
4414 
4415   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4416   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4417 
4418   The user MUST specify either the local or global matrix dimensions
4419   (possibly both).
4420 
4421   The parallel matrix is partitioned across processors such that the
4422   first `m0` rows belong to process 0, the next `m1` rows belong to
4423   process 1, the next `m2` rows belong to process 2, etc., where
4424   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4425   values corresponding to [m x N] submatrix.
4426 
4427   The columns are logically partitioned with the n0 columns belonging
4428   to 0th partition, the next n1 columns belonging to the next
4429   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4430 
4431   The DIAGONAL portion of the local submatrix on any given processor
4432   is the submatrix corresponding to the rows and columns m,n
4433   corresponding to the given processor. i.e diagonal matrix on
4434   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4435   etc. The remaining portion of the local submatrix [m x (N-n)]
4436   constitute the OFF-DIAGONAL portion. The example below better
4437   illustrates this concept.
4438 
4439   For a square global matrix we define each processor's diagonal portion
4440   to be its local rows and the corresponding columns (a square submatrix);
4441   each processor's off-diagonal portion encompasses the remainder of the
4442   local matrix (a rectangular submatrix).
4443 
4444   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4445 
4446   When calling this routine with a single process communicator, a matrix of
4447   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4448   type of communicator, use the construction mechanism
4449 .vb
4450   MatCreate(..., &A);
4451   MatSetType(A, MATMPIAIJ);
4452   MatSetSizes(A, m, n, M, N);
4453   MatMPIAIJSetPreallocation(A, ...);
4454 .ve
4455 
4456   By default, this format uses inodes (identical nodes) when possible.
4457   We search for consecutive rows with the same nonzero structure, thereby
4458   reusing matrix information to achieve increased efficiency.
4459 
4460   Example Usage:
4461   Consider the following 8x8 matrix with 34 non-zero values, that is
4462   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4463   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4464   as follows
4465 
4466 .vb
4467             1  2  0  |  0  3  0  |  0  4
4468     Proc0   0  5  6  |  7  0  0  |  8  0
4469             9  0 10  | 11  0  0  | 12  0
4470     -------------------------------------
4471            13  0 14  | 15 16 17  |  0  0
4472     Proc1   0 18  0  | 19 20 21  |  0  0
4473             0  0  0  | 22 23  0  | 24  0
4474     -------------------------------------
4475     Proc2  25 26 27  |  0  0 28  | 29  0
4476            30  0  0  | 31 32 33  |  0 34
4477 .ve
4478 
4479   This can be represented as a collection of submatrices as
4480 
4481 .vb
4482       A B C
4483       D E F
4484       G H I
4485 .ve
4486 
4487   Where the submatrices A,B,C are owned by proc0, D,E,F are
4488   owned by proc1, G,H,I are owned by proc2.
4489 
4490   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4491   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4492   The 'M','N' parameters are 8,8, and have the same values on all procs.
4493 
4494   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4495   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4496   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4497   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4498   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4499   matrix, ans [DF] as another SeqAIJ matrix.
4500 
4501   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4502   allocated for every row of the local diagonal submatrix, and `o_nz`
4503   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4504   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4505   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4506   In this case, the values of `d_nz`,`o_nz` are
4507 .vb
4508      proc0  dnz = 2, o_nz = 2
4509      proc1  dnz = 3, o_nz = 2
4510      proc2  dnz = 1, o_nz = 4
4511 .ve
4512   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4513   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4514   for proc3. i.e we are using 12+15+10=37 storage locations to store
4515   34 values.
4516 
4517   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4518   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4519   In the above case the values for d_nnz,o_nnz are
4520 .vb
4521      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4522      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4523      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4524 .ve
4525   Here the space allocated is sum of all the above values i.e 34, and
4526   hence pre-allocation is perfect.
4527 
4528 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4529           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4530           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4531 @*/
4532 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4533 {
4534   PetscMPIInt size;
4535 
4536   PetscFunctionBegin;
4537   PetscCall(MatCreate(comm, A));
4538   PetscCall(MatSetSizes(*A, m, n, M, N));
4539   PetscCallMPI(MPI_Comm_size(comm, &size));
4540   if (size > 1) {
4541     PetscCall(MatSetType(*A, MATMPIAIJ));
4542     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4543   } else {
4544     PetscCall(MatSetType(*A, MATSEQAIJ));
4545     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4546   }
4547   PetscFunctionReturn(PETSC_SUCCESS);
4548 }
4549 
4550 /*MC
4551     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4552 
4553     Synopsis:
4554     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4555 
4556     Not Collective
4557 
4558     Input Parameter:
4559 .   A - the `MATMPIAIJ` matrix
4560 
4561     Output Parameters:
4562 +   Ad - the diagonal portion of the matrix
4563 .   Ao - the off-diagonal portion of the matrix
4564 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4565 -   ierr - error code
4566 
4567      Level: advanced
4568 
4569     Note:
4570     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4571 
4572 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4573 M*/
4574 
4575 /*MC
4576     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4577 
4578     Synopsis:
4579     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4580 
4581     Not Collective
4582 
4583     Input Parameters:
4584 +   A - the `MATMPIAIJ` matrix
4585 .   Ad - the diagonal portion of the matrix
4586 .   Ao - the off-diagonal portion of the matrix
4587 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4588 -   ierr - error code
4589 
4590      Level: advanced
4591 
4592 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4593 M*/
4594 
4595 /*@C
4596   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4597 
4598   Not Collective
4599 
4600   Input Parameter:
4601 . A - The `MATMPIAIJ` matrix
4602 
4603   Output Parameters:
4604 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4605 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4606 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4607 
4608   Level: intermediate
4609 
4610   Note:
4611   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4612   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4613   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4614   local column numbers to global column numbers in the original matrix.
4615 
4616   Fortran Notes:
4617   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4618 
4619 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4620 @*/
4621 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4622 {
4623   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4624   PetscBool   flg;
4625 
4626   PetscFunctionBegin;
4627   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4628   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4629   if (Ad) *Ad = a->A;
4630   if (Ao) *Ao = a->B;
4631   if (colmap) *colmap = a->garray;
4632   PetscFunctionReturn(PETSC_SUCCESS);
4633 }
4634 
4635 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4636 {
4637   PetscInt     m, N, i, rstart, nnz, Ii;
4638   PetscInt    *indx;
4639   PetscScalar *values;
4640   MatType      rootType;
4641 
4642   PetscFunctionBegin;
4643   PetscCall(MatGetSize(inmat, &m, &N));
4644   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4645     PetscInt *dnz, *onz, sum, bs, cbs;
4646 
4647     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4648     /* Check sum(n) = N */
4649     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4650     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4651 
4652     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4653     rstart -= m;
4654 
4655     MatPreallocateBegin(comm, m, n, dnz, onz);
4656     for (i = 0; i < m; i++) {
4657       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4658       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4659       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4660     }
4661 
4662     PetscCall(MatCreate(comm, outmat));
4663     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4664     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4665     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4666     PetscCall(MatGetRootType_Private(inmat, &rootType));
4667     PetscCall(MatSetType(*outmat, rootType));
4668     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4669     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4670     MatPreallocateEnd(dnz, onz);
4671     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4672   }
4673 
4674   /* numeric phase */
4675   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4676   for (i = 0; i < m; i++) {
4677     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4678     Ii = i + rstart;
4679     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4680     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4681   }
4682   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4683   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4684   PetscFunctionReturn(PETSC_SUCCESS);
4685 }
4686 
4687 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4688 {
4689   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4690 
4691   PetscFunctionBegin;
4692   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4693   PetscCall(PetscFree(merge->id_r));
4694   PetscCall(PetscFree(merge->len_s));
4695   PetscCall(PetscFree(merge->len_r));
4696   PetscCall(PetscFree(merge->bi));
4697   PetscCall(PetscFree(merge->bj));
4698   PetscCall(PetscFree(merge->buf_ri[0]));
4699   PetscCall(PetscFree(merge->buf_ri));
4700   PetscCall(PetscFree(merge->buf_rj[0]));
4701   PetscCall(PetscFree(merge->buf_rj));
4702   PetscCall(PetscFree(merge->coi));
4703   PetscCall(PetscFree(merge->coj));
4704   PetscCall(PetscFree(merge->owners_co));
4705   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4706   PetscCall(PetscFree(merge));
4707   PetscFunctionReturn(PETSC_SUCCESS);
4708 }
4709 
4710 #include <../src/mat/utils/freespace.h>
4711 #include <petscbt.h>
4712 
4713 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4714 {
4715   MPI_Comm             comm;
4716   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4717   PetscMPIInt          size, rank, taga, *len_s;
4718   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4719   PetscInt             proc, m;
4720   PetscInt           **buf_ri, **buf_rj;
4721   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4722   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4723   MPI_Request         *s_waits, *r_waits;
4724   MPI_Status          *status;
4725   const MatScalar     *aa, *a_a;
4726   MatScalar          **abuf_r, *ba_i;
4727   Mat_Merge_SeqsToMPI *merge;
4728   PetscContainer       container;
4729 
4730   PetscFunctionBegin;
4731   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4732   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4733 
4734   PetscCallMPI(MPI_Comm_size(comm, &size));
4735   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4736 
4737   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4738   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4739   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4740   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4741   aa = a_a;
4742 
4743   bi     = merge->bi;
4744   bj     = merge->bj;
4745   buf_ri = merge->buf_ri;
4746   buf_rj = merge->buf_rj;
4747 
4748   PetscCall(PetscMalloc1(size, &status));
4749   owners = merge->rowmap->range;
4750   len_s  = merge->len_s;
4751 
4752   /* send and recv matrix values */
4753   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4754   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4755 
4756   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4757   for (proc = 0, k = 0; proc < size; proc++) {
4758     if (!len_s[proc]) continue;
4759     i = owners[proc];
4760     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4761     k++;
4762   }
4763 
4764   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4765   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4766   PetscCall(PetscFree(status));
4767 
4768   PetscCall(PetscFree(s_waits));
4769   PetscCall(PetscFree(r_waits));
4770 
4771   /* insert mat values of mpimat */
4772   PetscCall(PetscMalloc1(N, &ba_i));
4773   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4774 
4775   for (k = 0; k < merge->nrecv; k++) {
4776     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4777     nrows       = *buf_ri_k[k];
4778     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4779     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4780   }
4781 
4782   /* set values of ba */
4783   m = merge->rowmap->n;
4784   for (i = 0; i < m; i++) {
4785     arow = owners[rank] + i;
4786     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4787     bnzi = bi[i + 1] - bi[i];
4788     PetscCall(PetscArrayzero(ba_i, bnzi));
4789 
4790     /* add local non-zero vals of this proc's seqmat into ba */
4791     anzi   = ai[arow + 1] - ai[arow];
4792     aj     = a->j + ai[arow];
4793     aa     = a_a + ai[arow];
4794     nextaj = 0;
4795     for (j = 0; nextaj < anzi; j++) {
4796       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4797         ba_i[j] += aa[nextaj++];
4798       }
4799     }
4800 
4801     /* add received vals into ba */
4802     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4803       /* i-th row */
4804       if (i == *nextrow[k]) {
4805         anzi   = *(nextai[k] + 1) - *nextai[k];
4806         aj     = buf_rj[k] + *nextai[k];
4807         aa     = abuf_r[k] + *nextai[k];
4808         nextaj = 0;
4809         for (j = 0; nextaj < anzi; j++) {
4810           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4811             ba_i[j] += aa[nextaj++];
4812           }
4813         }
4814         nextrow[k]++;
4815         nextai[k]++;
4816       }
4817     }
4818     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4819   }
4820   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4821   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4822   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4823 
4824   PetscCall(PetscFree(abuf_r[0]));
4825   PetscCall(PetscFree(abuf_r));
4826   PetscCall(PetscFree(ba_i));
4827   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4828   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4829   PetscFunctionReturn(PETSC_SUCCESS);
4830 }
4831 
4832 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4833 {
4834   Mat                  B_mpi;
4835   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4836   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4837   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4838   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4839   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4840   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4841   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4842   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4843   MPI_Status          *status;
4844   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4845   PetscBT              lnkbt;
4846   Mat_Merge_SeqsToMPI *merge;
4847   PetscContainer       container;
4848 
4849   PetscFunctionBegin;
4850   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4851 
4852   /* make sure it is a PETSc comm */
4853   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4854   PetscCallMPI(MPI_Comm_size(comm, &size));
4855   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4856 
4857   PetscCall(PetscNew(&merge));
4858   PetscCall(PetscMalloc1(size, &status));
4859 
4860   /* determine row ownership */
4861   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4862   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4863   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4864   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4865   PetscCall(PetscLayoutSetUp(merge->rowmap));
4866   PetscCall(PetscMalloc1(size, &len_si));
4867   PetscCall(PetscMalloc1(size, &merge->len_s));
4868 
4869   m      = merge->rowmap->n;
4870   owners = merge->rowmap->range;
4871 
4872   /* determine the number of messages to send, their lengths */
4873   len_s = merge->len_s;
4874 
4875   len          = 0; /* length of buf_si[] */
4876   merge->nsend = 0;
4877   for (proc = 0; proc < size; proc++) {
4878     len_si[proc] = 0;
4879     if (proc == rank) {
4880       len_s[proc] = 0;
4881     } else {
4882       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4883       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4884     }
4885     if (len_s[proc]) {
4886       merge->nsend++;
4887       nrows = 0;
4888       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4889         if (ai[i + 1] > ai[i]) nrows++;
4890       }
4891       len_si[proc] = 2 * (nrows + 1);
4892       len += len_si[proc];
4893     }
4894   }
4895 
4896   /* determine the number and length of messages to receive for ij-structure */
4897   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4898   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4899 
4900   /* post the Irecv of j-structure */
4901   PetscCall(PetscCommGetNewTag(comm, &tagj));
4902   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4903 
4904   /* post the Isend of j-structure */
4905   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4906 
4907   for (proc = 0, k = 0; proc < size; proc++) {
4908     if (!len_s[proc]) continue;
4909     i = owners[proc];
4910     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4911     k++;
4912   }
4913 
4914   /* receives and sends of j-structure are complete */
4915   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4916   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4917 
4918   /* send and recv i-structure */
4919   PetscCall(PetscCommGetNewTag(comm, &tagi));
4920   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4921 
4922   PetscCall(PetscMalloc1(len + 1, &buf_s));
4923   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4924   for (proc = 0, k = 0; proc < size; proc++) {
4925     if (!len_s[proc]) continue;
4926     /* form outgoing message for i-structure:
4927          buf_si[0]:                 nrows to be sent
4928                [1:nrows]:           row index (global)
4929                [nrows+1:2*nrows+1]: i-structure index
4930     */
4931     nrows       = len_si[proc] / 2 - 1;
4932     buf_si_i    = buf_si + nrows + 1;
4933     buf_si[0]   = nrows;
4934     buf_si_i[0] = 0;
4935     nrows       = 0;
4936     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4937       anzi = ai[i + 1] - ai[i];
4938       if (anzi) {
4939         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4940         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4941         nrows++;
4942       }
4943     }
4944     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4945     k++;
4946     buf_si += len_si[proc];
4947   }
4948 
4949   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4950   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4951 
4952   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4953   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4954 
4955   PetscCall(PetscFree(len_si));
4956   PetscCall(PetscFree(len_ri));
4957   PetscCall(PetscFree(rj_waits));
4958   PetscCall(PetscFree2(si_waits, sj_waits));
4959   PetscCall(PetscFree(ri_waits));
4960   PetscCall(PetscFree(buf_s));
4961   PetscCall(PetscFree(status));
4962 
4963   /* compute a local seq matrix in each processor */
4964   /* allocate bi array and free space for accumulating nonzero column info */
4965   PetscCall(PetscMalloc1(m + 1, &bi));
4966   bi[0] = 0;
4967 
4968   /* create and initialize a linked list */
4969   nlnk = N + 1;
4970   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4971 
4972   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4973   len = ai[owners[rank + 1]] - ai[owners[rank]];
4974   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4975 
4976   current_space = free_space;
4977 
4978   /* determine symbolic info for each local row */
4979   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4980 
4981   for (k = 0; k < merge->nrecv; k++) {
4982     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4983     nrows       = *buf_ri_k[k];
4984     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4985     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4986   }
4987 
4988   MatPreallocateBegin(comm, m, n, dnz, onz);
4989   len = 0;
4990   for (i = 0; i < m; i++) {
4991     bnzi = 0;
4992     /* add local non-zero cols of this proc's seqmat into lnk */
4993     arow = owners[rank] + i;
4994     anzi = ai[arow + 1] - ai[arow];
4995     aj   = a->j + ai[arow];
4996     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4997     bnzi += nlnk;
4998     /* add received col data into lnk */
4999     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5000       if (i == *nextrow[k]) {            /* i-th row */
5001         anzi = *(nextai[k] + 1) - *nextai[k];
5002         aj   = buf_rj[k] + *nextai[k];
5003         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5004         bnzi += nlnk;
5005         nextrow[k]++;
5006         nextai[k]++;
5007       }
5008     }
5009     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5010 
5011     /* if free space is not available, make more free space */
5012     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5013     /* copy data into free space, then initialize lnk */
5014     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5015     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5016 
5017     current_space->array += bnzi;
5018     current_space->local_used += bnzi;
5019     current_space->local_remaining -= bnzi;
5020 
5021     bi[i + 1] = bi[i] + bnzi;
5022   }
5023 
5024   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5025 
5026   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5027   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5028   PetscCall(PetscLLDestroy(lnk, lnkbt));
5029 
5030   /* create symbolic parallel matrix B_mpi */
5031   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5032   PetscCall(MatCreate(comm, &B_mpi));
5033   if (n == PETSC_DECIDE) {
5034     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5035   } else {
5036     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5037   }
5038   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5039   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5040   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5041   MatPreallocateEnd(dnz, onz);
5042   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5043 
5044   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5045   B_mpi->assembled = PETSC_FALSE;
5046   merge->bi        = bi;
5047   merge->bj        = bj;
5048   merge->buf_ri    = buf_ri;
5049   merge->buf_rj    = buf_rj;
5050   merge->coi       = NULL;
5051   merge->coj       = NULL;
5052   merge->owners_co = NULL;
5053 
5054   PetscCall(PetscCommDestroy(&comm));
5055 
5056   /* attach the supporting struct to B_mpi for reuse */
5057   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5058   PetscCall(PetscContainerSetPointer(container, merge));
5059   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5060   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5061   PetscCall(PetscContainerDestroy(&container));
5062   *mpimat = B_mpi;
5063 
5064   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5065   PetscFunctionReturn(PETSC_SUCCESS);
5066 }
5067 
5068 /*@
5069   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5070   matrices from each processor
5071 
5072   Collective
5073 
5074   Input Parameters:
5075 + comm   - the communicators the parallel matrix will live on
5076 . seqmat - the input sequential matrices
5077 . m      - number of local rows (or `PETSC_DECIDE`)
5078 . n      - number of local columns (or `PETSC_DECIDE`)
5079 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5080 
5081   Output Parameter:
5082 . mpimat - the parallel matrix generated
5083 
5084   Level: advanced
5085 
5086   Note:
5087   The dimensions of the sequential matrix in each processor MUST be the same.
5088   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5089   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5090 
5091 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5092 @*/
5093 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5094 {
5095   PetscMPIInt size;
5096 
5097   PetscFunctionBegin;
5098   PetscCallMPI(MPI_Comm_size(comm, &size));
5099   if (size == 1) {
5100     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5101     if (scall == MAT_INITIAL_MATRIX) {
5102       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5103     } else {
5104       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5105     }
5106     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5107     PetscFunctionReturn(PETSC_SUCCESS);
5108   }
5109   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5110   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5111   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5112   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5113   PetscFunctionReturn(PETSC_SUCCESS);
5114 }
5115 
5116 /*@
5117   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5118 
5119   Not Collective
5120 
5121   Input Parameter:
5122 . A - the matrix
5123 
5124   Output Parameter:
5125 . A_loc - the local sequential matrix generated
5126 
5127   Level: developer
5128 
5129   Notes:
5130   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5131   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5132   `n` is the global column count obtained with `MatGetSize()`
5133 
5134   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5135 
5136   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5137 
5138   Destroy the matrix with `MatDestroy()`
5139 
5140 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5141 @*/
5142 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5143 {
5144   PetscBool mpi;
5145 
5146   PetscFunctionBegin;
5147   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5148   if (mpi) {
5149     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5150   } else {
5151     *A_loc = A;
5152     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5153   }
5154   PetscFunctionReturn(PETSC_SUCCESS);
5155 }
5156 
5157 /*@
5158   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5159 
5160   Not Collective
5161 
5162   Input Parameters:
5163 + A     - the matrix
5164 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5165 
5166   Output Parameter:
5167 . A_loc - the local sequential matrix generated
5168 
5169   Level: developer
5170 
5171   Notes:
5172   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5173   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5174   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5175 
5176   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5177 
5178   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5179   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5180   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5181   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5182 
5183 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5184 @*/
5185 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5186 {
5187   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5188   Mat_SeqAIJ        *mat, *a, *b;
5189   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5190   const PetscScalar *aa, *ba, *aav, *bav;
5191   PetscScalar       *ca, *cam;
5192   PetscMPIInt        size;
5193   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5194   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5195   PetscBool          match;
5196 
5197   PetscFunctionBegin;
5198   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5199   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5200   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5201   if (size == 1) {
5202     if (scall == MAT_INITIAL_MATRIX) {
5203       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5204       *A_loc = mpimat->A;
5205     } else if (scall == MAT_REUSE_MATRIX) {
5206       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5207     }
5208     PetscFunctionReturn(PETSC_SUCCESS);
5209   }
5210 
5211   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5212   a  = (Mat_SeqAIJ *)mpimat->A->data;
5213   b  = (Mat_SeqAIJ *)mpimat->B->data;
5214   ai = a->i;
5215   aj = a->j;
5216   bi = b->i;
5217   bj = b->j;
5218   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5219   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5220   aa = aav;
5221   ba = bav;
5222   if (scall == MAT_INITIAL_MATRIX) {
5223     PetscCall(PetscMalloc1(1 + am, &ci));
5224     ci[0] = 0;
5225     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5226     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5227     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5228     k = 0;
5229     for (i = 0; i < am; i++) {
5230       ncols_o = bi[i + 1] - bi[i];
5231       ncols_d = ai[i + 1] - ai[i];
5232       /* off-diagonal portion of A */
5233       for (jo = 0; jo < ncols_o; jo++) {
5234         col = cmap[*bj];
5235         if (col >= cstart) break;
5236         cj[k] = col;
5237         bj++;
5238         ca[k++] = *ba++;
5239       }
5240       /* diagonal portion of A */
5241       for (j = 0; j < ncols_d; j++) {
5242         cj[k]   = cstart + *aj++;
5243         ca[k++] = *aa++;
5244       }
5245       /* off-diagonal portion of A */
5246       for (j = jo; j < ncols_o; j++) {
5247         cj[k]   = cmap[*bj++];
5248         ca[k++] = *ba++;
5249       }
5250     }
5251     /* put together the new matrix */
5252     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5253     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5254     /* Since these are PETSc arrays, change flags to free them as necessary. */
5255     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5256     mat->free_a  = PETSC_TRUE;
5257     mat->free_ij = PETSC_TRUE;
5258     mat->nonew   = 0;
5259   } else if (scall == MAT_REUSE_MATRIX) {
5260     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5261     ci  = mat->i;
5262     cj  = mat->j;
5263     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5264     for (i = 0; i < am; i++) {
5265       /* off-diagonal portion of A */
5266       ncols_o = bi[i + 1] - bi[i];
5267       for (jo = 0; jo < ncols_o; jo++) {
5268         col = cmap[*bj];
5269         if (col >= cstart) break;
5270         *cam++ = *ba++;
5271         bj++;
5272       }
5273       /* diagonal portion of A */
5274       ncols_d = ai[i + 1] - ai[i];
5275       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5276       /* off-diagonal portion of A */
5277       for (j = jo; j < ncols_o; j++) {
5278         *cam++ = *ba++;
5279         bj++;
5280       }
5281     }
5282     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5283   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5284   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5285   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5286   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5287   PetscFunctionReturn(PETSC_SUCCESS);
5288 }
5289 
5290 /*@
5291   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5292   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5293 
5294   Not Collective
5295 
5296   Input Parameters:
5297 + A     - the matrix
5298 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5299 
5300   Output Parameters:
5301 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5302 - A_loc - the local sequential matrix generated
5303 
5304   Level: developer
5305 
5306   Note:
5307   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5308   part, then those associated with the off-diagonal part (in its local ordering)
5309 
5310 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5311 @*/
5312 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5313 {
5314   Mat             Ao, Ad;
5315   const PetscInt *cmap;
5316   PetscMPIInt     size;
5317   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5318 
5319   PetscFunctionBegin;
5320   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5321   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5322   if (size == 1) {
5323     if (scall == MAT_INITIAL_MATRIX) {
5324       PetscCall(PetscObjectReference((PetscObject)Ad));
5325       *A_loc = Ad;
5326     } else if (scall == MAT_REUSE_MATRIX) {
5327       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5328     }
5329     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5330     PetscFunctionReturn(PETSC_SUCCESS);
5331   }
5332   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5333   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5334   if (f) {
5335     PetscCall((*f)(A, scall, glob, A_loc));
5336   } else {
5337     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5338     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5339     Mat_SeqAIJ        *c;
5340     PetscInt          *ai = a->i, *aj = a->j;
5341     PetscInt          *bi = b->i, *bj = b->j;
5342     PetscInt          *ci, *cj;
5343     const PetscScalar *aa, *ba;
5344     PetscScalar       *ca;
5345     PetscInt           i, j, am, dn, on;
5346 
5347     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5348     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5349     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5350     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5351     if (scall == MAT_INITIAL_MATRIX) {
5352       PetscInt k;
5353       PetscCall(PetscMalloc1(1 + am, &ci));
5354       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5355       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5356       ci[0] = 0;
5357       for (i = 0, k = 0; i < am; i++) {
5358         const PetscInt ncols_o = bi[i + 1] - bi[i];
5359         const PetscInt ncols_d = ai[i + 1] - ai[i];
5360         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5361         /* diagonal portion of A */
5362         for (j = 0; j < ncols_d; j++, k++) {
5363           cj[k] = *aj++;
5364           ca[k] = *aa++;
5365         }
5366         /* off-diagonal portion of A */
5367         for (j = 0; j < ncols_o; j++, k++) {
5368           cj[k] = dn + *bj++;
5369           ca[k] = *ba++;
5370         }
5371       }
5372       /* put together the new matrix */
5373       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5374       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5375       /* Since these are PETSc arrays, change flags to free them as necessary. */
5376       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5377       c->free_a  = PETSC_TRUE;
5378       c->free_ij = PETSC_TRUE;
5379       c->nonew   = 0;
5380       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5381     } else if (scall == MAT_REUSE_MATRIX) {
5382       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5383       for (i = 0; i < am; i++) {
5384         const PetscInt ncols_d = ai[i + 1] - ai[i];
5385         const PetscInt ncols_o = bi[i + 1] - bi[i];
5386         /* diagonal portion of A */
5387         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5388         /* off-diagonal portion of A */
5389         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5390       }
5391       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5392     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5393     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5394     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5395     if (glob) {
5396       PetscInt cst, *gidx;
5397 
5398       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5399       PetscCall(PetscMalloc1(dn + on, &gidx));
5400       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5401       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5402       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5403     }
5404   }
5405   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5406   PetscFunctionReturn(PETSC_SUCCESS);
5407 }
5408 
5409 /*@C
5410   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5411 
5412   Not Collective
5413 
5414   Input Parameters:
5415 + A     - the matrix
5416 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5417 . row   - index set of rows to extract (or `NULL`)
5418 - col   - index set of columns to extract (or `NULL`)
5419 
5420   Output Parameter:
5421 . A_loc - the local sequential matrix generated
5422 
5423   Level: developer
5424 
5425 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5426 @*/
5427 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5428 {
5429   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5430   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5431   IS          isrowa, iscola;
5432   Mat        *aloc;
5433   PetscBool   match;
5434 
5435   PetscFunctionBegin;
5436   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5437   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5438   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5439   if (!row) {
5440     start = A->rmap->rstart;
5441     end   = A->rmap->rend;
5442     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5443   } else {
5444     isrowa = *row;
5445   }
5446   if (!col) {
5447     start = A->cmap->rstart;
5448     cmap  = a->garray;
5449     nzA   = a->A->cmap->n;
5450     nzB   = a->B->cmap->n;
5451     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5452     ncols = 0;
5453     for (i = 0; i < nzB; i++) {
5454       if (cmap[i] < start) idx[ncols++] = cmap[i];
5455       else break;
5456     }
5457     imark = i;
5458     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5459     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5460     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5461   } else {
5462     iscola = *col;
5463   }
5464   if (scall != MAT_INITIAL_MATRIX) {
5465     PetscCall(PetscMalloc1(1, &aloc));
5466     aloc[0] = *A_loc;
5467   }
5468   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5469   if (!col) { /* attach global id of condensed columns */
5470     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5471   }
5472   *A_loc = aloc[0];
5473   PetscCall(PetscFree(aloc));
5474   if (!row) PetscCall(ISDestroy(&isrowa));
5475   if (!col) PetscCall(ISDestroy(&iscola));
5476   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5477   PetscFunctionReturn(PETSC_SUCCESS);
5478 }
5479 
5480 /*
5481  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5482  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5483  * on a global size.
5484  * */
5485 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5486 {
5487   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5488   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5489   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5490   PetscMPIInt            owner;
5491   PetscSFNode           *iremote, *oiremote;
5492   const PetscInt        *lrowindices;
5493   PetscSF                sf, osf;
5494   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5495   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5496   MPI_Comm               comm;
5497   ISLocalToGlobalMapping mapping;
5498   const PetscScalar     *pd_a, *po_a;
5499 
5500   PetscFunctionBegin;
5501   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5502   /* plocalsize is the number of roots
5503    * nrows is the number of leaves
5504    * */
5505   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5506   PetscCall(ISGetLocalSize(rows, &nrows));
5507   PetscCall(PetscCalloc1(nrows, &iremote));
5508   PetscCall(ISGetIndices(rows, &lrowindices));
5509   for (i = 0; i < nrows; i++) {
5510     /* Find a remote index and an owner for a row
5511      * The row could be local or remote
5512      * */
5513     owner = 0;
5514     lidx  = 0;
5515     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5516     iremote[i].index = lidx;
5517     iremote[i].rank  = owner;
5518   }
5519   /* Create SF to communicate how many nonzero columns for each row */
5520   PetscCall(PetscSFCreate(comm, &sf));
5521   /* SF will figure out the number of nonzero columns for each row, and their
5522    * offsets
5523    * */
5524   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5525   PetscCall(PetscSFSetFromOptions(sf));
5526   PetscCall(PetscSFSetUp(sf));
5527 
5528   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5529   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5530   PetscCall(PetscCalloc1(nrows, &pnnz));
5531   roffsets[0] = 0;
5532   roffsets[1] = 0;
5533   for (i = 0; i < plocalsize; i++) {
5534     /* diagonal */
5535     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5536     /* off-diagonal */
5537     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5538     /* compute offsets so that we relative location for each row */
5539     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5540     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5541   }
5542   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5543   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5544   /* 'r' means root, and 'l' means leaf */
5545   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5546   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5547   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5548   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5549   PetscCall(PetscSFDestroy(&sf));
5550   PetscCall(PetscFree(roffsets));
5551   PetscCall(PetscFree(nrcols));
5552   dntotalcols = 0;
5553   ontotalcols = 0;
5554   ncol        = 0;
5555   for (i = 0; i < nrows; i++) {
5556     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5557     ncol    = PetscMax(pnnz[i], ncol);
5558     /* diagonal */
5559     dntotalcols += nlcols[i * 2 + 0];
5560     /* off-diagonal */
5561     ontotalcols += nlcols[i * 2 + 1];
5562   }
5563   /* We do not need to figure the right number of columns
5564    * since all the calculations will be done by going through the raw data
5565    * */
5566   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5567   PetscCall(MatSetUp(*P_oth));
5568   PetscCall(PetscFree(pnnz));
5569   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5570   /* diagonal */
5571   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5572   /* off-diagonal */
5573   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5574   /* diagonal */
5575   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5576   /* off-diagonal */
5577   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5578   dntotalcols = 0;
5579   ontotalcols = 0;
5580   ntotalcols  = 0;
5581   for (i = 0; i < nrows; i++) {
5582     owner = 0;
5583     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5584     /* Set iremote for diag matrix */
5585     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5586       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5587       iremote[dntotalcols].rank  = owner;
5588       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5589       ilocal[dntotalcols++] = ntotalcols++;
5590     }
5591     /* off-diagonal */
5592     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5593       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5594       oiremote[ontotalcols].rank  = owner;
5595       oilocal[ontotalcols++]      = ntotalcols++;
5596     }
5597   }
5598   PetscCall(ISRestoreIndices(rows, &lrowindices));
5599   PetscCall(PetscFree(loffsets));
5600   PetscCall(PetscFree(nlcols));
5601   PetscCall(PetscSFCreate(comm, &sf));
5602   /* P serves as roots and P_oth is leaves
5603    * Diag matrix
5604    * */
5605   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5606   PetscCall(PetscSFSetFromOptions(sf));
5607   PetscCall(PetscSFSetUp(sf));
5608 
5609   PetscCall(PetscSFCreate(comm, &osf));
5610   /* off-diagonal */
5611   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5612   PetscCall(PetscSFSetFromOptions(osf));
5613   PetscCall(PetscSFSetUp(osf));
5614   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5615   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5616   /* operate on the matrix internal data to save memory */
5617   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5618   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5619   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5620   /* Convert to global indices for diag matrix */
5621   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5622   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5623   /* We want P_oth store global indices */
5624   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5625   /* Use memory scalable approach */
5626   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5627   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5628   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5629   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5630   /* Convert back to local indices */
5631   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5632   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5633   nout = 0;
5634   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5635   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5636   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5637   /* Exchange values */
5638   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5639   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5640   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5641   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5642   /* Stop PETSc from shrinking memory */
5643   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5644   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5645   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5646   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5647   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5648   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5649   PetscCall(PetscSFDestroy(&sf));
5650   PetscCall(PetscSFDestroy(&osf));
5651   PetscFunctionReturn(PETSC_SUCCESS);
5652 }
5653 
5654 /*
5655  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5656  * This supports MPIAIJ and MAIJ
5657  * */
5658 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5659 {
5660   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5661   Mat_SeqAIJ *p_oth;
5662   IS          rows, map;
5663   PetscHMapI  hamp;
5664   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5665   MPI_Comm    comm;
5666   PetscSF     sf, osf;
5667   PetscBool   has;
5668 
5669   PetscFunctionBegin;
5670   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5671   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5672   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5673    *  and then create a submatrix (that often is an overlapping matrix)
5674    * */
5675   if (reuse == MAT_INITIAL_MATRIX) {
5676     /* Use a hash table to figure out unique keys */
5677     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5678     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5679     count = 0;
5680     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5681     for (i = 0; i < a->B->cmap->n; i++) {
5682       key = a->garray[i] / dof;
5683       PetscCall(PetscHMapIHas(hamp, key, &has));
5684       if (!has) {
5685         mapping[i] = count;
5686         PetscCall(PetscHMapISet(hamp, key, count++));
5687       } else {
5688         /* Current 'i' has the same value the previous step */
5689         mapping[i] = count - 1;
5690       }
5691     }
5692     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5693     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5694     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5695     PetscCall(PetscCalloc1(htsize, &rowindices));
5696     off = 0;
5697     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5698     PetscCall(PetscHMapIDestroy(&hamp));
5699     PetscCall(PetscSortInt(htsize, rowindices));
5700     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5701     /* In case, the matrix was already created but users want to recreate the matrix */
5702     PetscCall(MatDestroy(P_oth));
5703     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5704     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5705     PetscCall(ISDestroy(&map));
5706     PetscCall(ISDestroy(&rows));
5707   } else if (reuse == MAT_REUSE_MATRIX) {
5708     /* If matrix was already created, we simply update values using SF objects
5709      * that as attached to the matrix earlier.
5710      */
5711     const PetscScalar *pd_a, *po_a;
5712 
5713     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5714     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5715     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5716     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5717     /* Update values in place */
5718     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5719     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5720     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5721     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5722     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5723     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5724     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5725     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5726   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5727   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5728   PetscFunctionReturn(PETSC_SUCCESS);
5729 }
5730 
5731 /*@C
5732   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5733 
5734   Collective
5735 
5736   Input Parameters:
5737 + A     - the first matrix in `MATMPIAIJ` format
5738 . B     - the second matrix in `MATMPIAIJ` format
5739 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5740 
5741   Output Parameters:
5742 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5743 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5744 - B_seq - the sequential matrix generated
5745 
5746   Level: developer
5747 
5748 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5749 @*/
5750 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5751 {
5752   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5753   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5754   IS          isrowb, iscolb;
5755   Mat        *bseq = NULL;
5756 
5757   PetscFunctionBegin;
5758   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5759              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5760   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5761 
5762   if (scall == MAT_INITIAL_MATRIX) {
5763     start = A->cmap->rstart;
5764     cmap  = a->garray;
5765     nzA   = a->A->cmap->n;
5766     nzB   = a->B->cmap->n;
5767     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5768     ncols = 0;
5769     for (i = 0; i < nzB; i++) { /* row < local row index */
5770       if (cmap[i] < start) idx[ncols++] = cmap[i];
5771       else break;
5772     }
5773     imark = i;
5774     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5775     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5776     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5777     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5778   } else {
5779     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5780     isrowb = *rowb;
5781     iscolb = *colb;
5782     PetscCall(PetscMalloc1(1, &bseq));
5783     bseq[0] = *B_seq;
5784   }
5785   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5786   *B_seq = bseq[0];
5787   PetscCall(PetscFree(bseq));
5788   if (!rowb) {
5789     PetscCall(ISDestroy(&isrowb));
5790   } else {
5791     *rowb = isrowb;
5792   }
5793   if (!colb) {
5794     PetscCall(ISDestroy(&iscolb));
5795   } else {
5796     *colb = iscolb;
5797   }
5798   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5799   PetscFunctionReturn(PETSC_SUCCESS);
5800 }
5801 
5802 /*
5803     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5804     of the OFF-DIAGONAL portion of local A
5805 
5806     Collective
5807 
5808    Input Parameters:
5809 +    A,B - the matrices in `MATMPIAIJ` format
5810 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5811 
5812    Output Parameter:
5813 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5814 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5815 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5816 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5817 
5818     Developer Note:
5819     This directly accesses information inside the VecScatter associated with the matrix-vector product
5820      for this matrix. This is not desirable..
5821 
5822     Level: developer
5823 
5824 */
5825 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5826 {
5827   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5828   Mat_SeqAIJ        *b_oth;
5829   VecScatter         ctx;
5830   MPI_Comm           comm;
5831   const PetscMPIInt *rprocs, *sprocs;
5832   const PetscInt    *srow, *rstarts, *sstarts;
5833   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5834   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5835   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5836   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5837   PetscMPIInt        size, tag, rank, nreqs;
5838 
5839   PetscFunctionBegin;
5840   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5841   PetscCallMPI(MPI_Comm_size(comm, &size));
5842 
5843   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5844              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5845   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5846   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5847 
5848   if (size == 1) {
5849     startsj_s = NULL;
5850     bufa_ptr  = NULL;
5851     *B_oth    = NULL;
5852     PetscFunctionReturn(PETSC_SUCCESS);
5853   }
5854 
5855   ctx = a->Mvctx;
5856   tag = ((PetscObject)ctx)->tag;
5857 
5858   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5859   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5860   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5861   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5862   PetscCall(PetscMalloc1(nreqs, &reqs));
5863   rwaits = reqs;
5864   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5865 
5866   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5867   if (scall == MAT_INITIAL_MATRIX) {
5868     /* i-array */
5869     /*  post receives */
5870     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5871     for (i = 0; i < nrecvs; i++) {
5872       rowlen = rvalues + rstarts[i] * rbs;
5873       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5874       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5875     }
5876 
5877     /* pack the outgoing message */
5878     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5879 
5880     sstartsj[0] = 0;
5881     rstartsj[0] = 0;
5882     len         = 0; /* total length of j or a array to be sent */
5883     if (nsends) {
5884       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5885       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5886     }
5887     for (i = 0; i < nsends; i++) {
5888       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5889       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5890       for (j = 0; j < nrows; j++) {
5891         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5892         for (l = 0; l < sbs; l++) {
5893           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5894 
5895           rowlen[j * sbs + l] = ncols;
5896 
5897           len += ncols;
5898           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5899         }
5900         k++;
5901       }
5902       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5903 
5904       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5905     }
5906     /* recvs and sends of i-array are completed */
5907     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5908     PetscCall(PetscFree(svalues));
5909 
5910     /* allocate buffers for sending j and a arrays */
5911     PetscCall(PetscMalloc1(len + 1, &bufj));
5912     PetscCall(PetscMalloc1(len + 1, &bufa));
5913 
5914     /* create i-array of B_oth */
5915     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5916 
5917     b_othi[0] = 0;
5918     len       = 0; /* total length of j or a array to be received */
5919     k         = 0;
5920     for (i = 0; i < nrecvs; i++) {
5921       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5922       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5923       for (j = 0; j < nrows; j++) {
5924         b_othi[k + 1] = b_othi[k] + rowlen[j];
5925         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5926         k++;
5927       }
5928       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5929     }
5930     PetscCall(PetscFree(rvalues));
5931 
5932     /* allocate space for j and a arrays of B_oth */
5933     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5934     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5935 
5936     /* j-array */
5937     /*  post receives of j-array */
5938     for (i = 0; i < nrecvs; i++) {
5939       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5940       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5941     }
5942 
5943     /* pack the outgoing message j-array */
5944     if (nsends) k = sstarts[0];
5945     for (i = 0; i < nsends; i++) {
5946       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5947       bufJ  = bufj + sstartsj[i];
5948       for (j = 0; j < nrows; j++) {
5949         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5950         for (ll = 0; ll < sbs; ll++) {
5951           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5952           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5953           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5954         }
5955       }
5956       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5957     }
5958 
5959     /* recvs and sends of j-array are completed */
5960     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5961   } else if (scall == MAT_REUSE_MATRIX) {
5962     sstartsj = *startsj_s;
5963     rstartsj = *startsj_r;
5964     bufa     = *bufa_ptr;
5965     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5966     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5967   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5968 
5969   /* a-array */
5970   /*  post receives of a-array */
5971   for (i = 0; i < nrecvs; i++) {
5972     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5973     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5974   }
5975 
5976   /* pack the outgoing message a-array */
5977   if (nsends) k = sstarts[0];
5978   for (i = 0; i < nsends; i++) {
5979     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5980     bufA  = bufa + sstartsj[i];
5981     for (j = 0; j < nrows; j++) {
5982       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5983       for (ll = 0; ll < sbs; ll++) {
5984         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5985         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5986         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5987       }
5988     }
5989     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5990   }
5991   /* recvs and sends of a-array are completed */
5992   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5993   PetscCall(PetscFree(reqs));
5994 
5995   if (scall == MAT_INITIAL_MATRIX) {
5996     /* put together the new matrix */
5997     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5998 
5999     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6000     /* Since these are PETSc arrays, change flags to free them as necessary. */
6001     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6002     b_oth->free_a  = PETSC_TRUE;
6003     b_oth->free_ij = PETSC_TRUE;
6004     b_oth->nonew   = 0;
6005 
6006     PetscCall(PetscFree(bufj));
6007     if (!startsj_s || !bufa_ptr) {
6008       PetscCall(PetscFree2(sstartsj, rstartsj));
6009       PetscCall(PetscFree(bufa_ptr));
6010     } else {
6011       *startsj_s = sstartsj;
6012       *startsj_r = rstartsj;
6013       *bufa_ptr  = bufa;
6014     }
6015   } else if (scall == MAT_REUSE_MATRIX) {
6016     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6017   }
6018 
6019   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6020   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6021   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6022   PetscFunctionReturn(PETSC_SUCCESS);
6023 }
6024 
6025 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6026 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6028 #if defined(PETSC_HAVE_MKL_SPARSE)
6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6030 #endif
6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6033 #if defined(PETSC_HAVE_ELEMENTAL)
6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6035 #endif
6036 #if defined(PETSC_HAVE_SCALAPACK)
6037 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6038 #endif
6039 #if defined(PETSC_HAVE_HYPRE)
6040 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6041 #endif
6042 #if defined(PETSC_HAVE_CUDA)
6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6044 #endif
6045 #if defined(PETSC_HAVE_HIP)
6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6047 #endif
6048 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6050 #endif
6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6052 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6053 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6054 
6055 /*
6056     Computes (B'*A')' since computing B*A directly is untenable
6057 
6058                n                       p                          p
6059         [             ]       [             ]         [                 ]
6060       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6061         [             ]       [             ]         [                 ]
6062 
6063 */
6064 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6065 {
6066   Mat At, Bt, Ct;
6067 
6068   PetscFunctionBegin;
6069   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6070   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6071   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6072   PetscCall(MatDestroy(&At));
6073   PetscCall(MatDestroy(&Bt));
6074   PetscCall(MatTransposeSetPrecursor(Ct, C));
6075   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6076   PetscCall(MatDestroy(&Ct));
6077   PetscFunctionReturn(PETSC_SUCCESS);
6078 }
6079 
6080 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6081 {
6082   PetscBool cisdense;
6083 
6084   PetscFunctionBegin;
6085   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6086   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6087   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6088   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6089   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6090   PetscCall(MatSetUp(C));
6091 
6092   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6093   PetscFunctionReturn(PETSC_SUCCESS);
6094 }
6095 
6096 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6097 {
6098   Mat_Product *product = C->product;
6099   Mat          A = product->A, B = product->B;
6100 
6101   PetscFunctionBegin;
6102   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6103              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6104   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6105   C->ops->productsymbolic = MatProductSymbolic_AB;
6106   PetscFunctionReturn(PETSC_SUCCESS);
6107 }
6108 
6109 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6110 {
6111   Mat_Product *product = C->product;
6112 
6113   PetscFunctionBegin;
6114   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6115   PetscFunctionReturn(PETSC_SUCCESS);
6116 }
6117 
6118 /*
6119    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6120 
6121   Input Parameters:
6122 
6123     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6124     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6125 
6126     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6127 
6128     For Set1, j1[] contains column indices of the nonzeros.
6129     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6130     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6131     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6132 
6133     Similar for Set2.
6134 
6135     This routine merges the two sets of nonzeros row by row and removes repeats.
6136 
6137   Output Parameters: (memory is allocated by the caller)
6138 
6139     i[],j[]: the CSR of the merged matrix, which has m rows.
6140     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6141     imap2[]: similar to imap1[], but for Set2.
6142     Note we order nonzeros row-by-row and from left to right.
6143 */
6144 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6145 {
6146   PetscInt   r, m; /* Row index of mat */
6147   PetscCount t, t1, t2, b1, e1, b2, e2;
6148 
6149   PetscFunctionBegin;
6150   PetscCall(MatGetLocalSize(mat, &m, NULL));
6151   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6152   i[0]        = 0;
6153   for (r = 0; r < m; r++) { /* Do row by row merging */
6154     b1 = rowBegin1[r];
6155     e1 = rowEnd1[r];
6156     b2 = rowBegin2[r];
6157     e2 = rowEnd2[r];
6158     while (b1 < e1 && b2 < e2) {
6159       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6160         j[t]      = j1[b1];
6161         imap1[t1] = t;
6162         imap2[t2] = t;
6163         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6164         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6165         t1++;
6166         t2++;
6167         t++;
6168       } else if (j1[b1] < j2[b2]) {
6169         j[t]      = j1[b1];
6170         imap1[t1] = t;
6171         b1 += jmap1[t1 + 1] - jmap1[t1];
6172         t1++;
6173         t++;
6174       } else {
6175         j[t]      = j2[b2];
6176         imap2[t2] = t;
6177         b2 += jmap2[t2 + 1] - jmap2[t2];
6178         t2++;
6179         t++;
6180       }
6181     }
6182     /* Merge the remaining in either j1[] or j2[] */
6183     while (b1 < e1) {
6184       j[t]      = j1[b1];
6185       imap1[t1] = t;
6186       b1 += jmap1[t1 + 1] - jmap1[t1];
6187       t1++;
6188       t++;
6189     }
6190     while (b2 < e2) {
6191       j[t]      = j2[b2];
6192       imap2[t2] = t;
6193       b2 += jmap2[t2 + 1] - jmap2[t2];
6194       t2++;
6195       t++;
6196     }
6197     i[r + 1] = t;
6198   }
6199   PetscFunctionReturn(PETSC_SUCCESS);
6200 }
6201 
6202 /*
6203   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6204 
6205   Input Parameters:
6206     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6207     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6208       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6209 
6210       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6211       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6212 
6213   Output Parameters:
6214     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6215     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6216       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6217       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6218 
6219     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6220       Atot: number of entries belonging to the diagonal block.
6221       Annz: number of unique nonzeros belonging to the diagonal block.
6222       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6223         repeats (i.e., same 'i,j' pair).
6224       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6225         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6226 
6227       Atot: number of entries belonging to the diagonal block
6228       Annz: number of unique nonzeros belonging to the diagonal block.
6229 
6230     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6231 
6232     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6233 */
6234 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6235 {
6236   PetscInt    cstart, cend, rstart, rend, row, col;
6237   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6238   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6239   PetscCount  k, m, p, q, r, s, mid;
6240   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6241 
6242   PetscFunctionBegin;
6243   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6244   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6245   m = rend - rstart;
6246 
6247   /* Skip negative rows */
6248   for (k = 0; k < n; k++)
6249     if (i[k] >= 0) break;
6250 
6251   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6252      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6253   */
6254   while (k < n) {
6255     row = i[k];
6256     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6257     for (s = k; s < n; s++)
6258       if (i[s] != row) break;
6259 
6260     /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */
6261     for (p = k; p < s; p++) {
6262       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT;
6263       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6264     }
6265     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6266     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6267     rowBegin[row - rstart] = k;
6268     rowMid[row - rstart]   = mid;
6269     rowEnd[row - rstart]   = s;
6270 
6271     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6272     Atot += mid - k;
6273     Btot += s - mid;
6274 
6275     /* Count unique nonzeros of this diag row */
6276     for (p = k; p < mid;) {
6277       col = j[p];
6278       do {
6279         j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */
6280         p++;
6281       } while (p < mid && j[p] == col);
6282       Annz++;
6283     }
6284 
6285     /* Count unique nonzeros of this offdiag row */
6286     for (p = mid; p < s;) {
6287       col = j[p];
6288       do {
6289         p++;
6290       } while (p < s && j[p] == col);
6291       Bnnz++;
6292     }
6293     k = s;
6294   }
6295 
6296   /* Allocation according to Atot, Btot, Annz, Bnnz */
6297   PetscCall(PetscMalloc1(Atot, &Aperm));
6298   PetscCall(PetscMalloc1(Btot, &Bperm));
6299   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6300   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6301 
6302   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6303   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6304   for (r = 0; r < m; r++) {
6305     k   = rowBegin[r];
6306     mid = rowMid[r];
6307     s   = rowEnd[r];
6308     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6309     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6310     Atot += mid - k;
6311     Btot += s - mid;
6312 
6313     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6314     for (p = k; p < mid;) {
6315       col = j[p];
6316       q   = p;
6317       do {
6318         p++;
6319       } while (p < mid && j[p] == col);
6320       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6321       Annz++;
6322     }
6323 
6324     for (p = mid; p < s;) {
6325       col = j[p];
6326       q   = p;
6327       do {
6328         p++;
6329       } while (p < s && j[p] == col);
6330       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6331       Bnnz++;
6332     }
6333   }
6334   /* Output */
6335   *Aperm_ = Aperm;
6336   *Annz_  = Annz;
6337   *Atot_  = Atot;
6338   *Ajmap_ = Ajmap;
6339   *Bperm_ = Bperm;
6340   *Bnnz_  = Bnnz;
6341   *Btot_  = Btot;
6342   *Bjmap_ = Bjmap;
6343   PetscFunctionReturn(PETSC_SUCCESS);
6344 }
6345 
6346 /*
6347   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6348 
6349   Input Parameters:
6350     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6351     nnz:  number of unique nonzeros in the merged matrix
6352     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6353     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6354 
6355   Output Parameter: (memory is allocated by the caller)
6356     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6357 
6358   Example:
6359     nnz1 = 4
6360     nnz  = 6
6361     imap = [1,3,4,5]
6362     jmap = [0,3,5,6,7]
6363    then,
6364     jmap_new = [0,0,3,3,5,6,7]
6365 */
6366 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6367 {
6368   PetscCount k, p;
6369 
6370   PetscFunctionBegin;
6371   jmap_new[0] = 0;
6372   p           = nnz;                /* p loops over jmap_new[] backwards */
6373   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6374     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6375   }
6376   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6377   PetscFunctionReturn(PETSC_SUCCESS);
6378 }
6379 
6380 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6381 {
6382   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6383 
6384   PetscFunctionBegin;
6385   PetscCall(PetscSFDestroy(&coo->sf));
6386   PetscCall(PetscFree(coo->Aperm1));
6387   PetscCall(PetscFree(coo->Bperm1));
6388   PetscCall(PetscFree(coo->Ajmap1));
6389   PetscCall(PetscFree(coo->Bjmap1));
6390   PetscCall(PetscFree(coo->Aimap2));
6391   PetscCall(PetscFree(coo->Bimap2));
6392   PetscCall(PetscFree(coo->Aperm2));
6393   PetscCall(PetscFree(coo->Bperm2));
6394   PetscCall(PetscFree(coo->Ajmap2));
6395   PetscCall(PetscFree(coo->Bjmap2));
6396   PetscCall(PetscFree(coo->Cperm1));
6397   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6398   PetscCall(PetscFree(coo));
6399   PetscFunctionReturn(PETSC_SUCCESS);
6400 }
6401 
6402 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6403 {
6404   MPI_Comm             comm;
6405   PetscMPIInt          rank, size;
6406   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6407   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6408   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6409   PetscContainer       container;
6410   MatCOOStruct_MPIAIJ *coo;
6411 
6412   PetscFunctionBegin;
6413   PetscCall(PetscFree(mpiaij->garray));
6414   PetscCall(VecDestroy(&mpiaij->lvec));
6415 #if defined(PETSC_USE_CTABLE)
6416   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6417 #else
6418   PetscCall(PetscFree(mpiaij->colmap));
6419 #endif
6420   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6421   mat->assembled     = PETSC_FALSE;
6422   mat->was_assembled = PETSC_FALSE;
6423 
6424   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6425   PetscCallMPI(MPI_Comm_size(comm, &size));
6426   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6427   PetscCall(PetscLayoutSetUp(mat->rmap));
6428   PetscCall(PetscLayoutSetUp(mat->cmap));
6429   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6430   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6431   PetscCall(MatGetLocalSize(mat, &m, &n));
6432   PetscCall(MatGetSize(mat, &M, &N));
6433 
6434   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6435   /* entries come first, then local rows, then remote rows.                     */
6436   PetscCount n1 = coo_n, *perm1;
6437   PetscInt  *i1 = coo_i, *j1 = coo_j;
6438 
6439   PetscCall(PetscMalloc1(n1, &perm1));
6440   for (k = 0; k < n1; k++) perm1[k] = k;
6441 
6442   /* Manipulate indices so that entries with negative row or col indices will have smallest
6443      row indices, local entries will have greater but negative row indices, and remote entries
6444      will have positive row indices.
6445   */
6446   for (k = 0; k < n1; k++) {
6447     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6448     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6449     else {
6450       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6451       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6452     }
6453   }
6454 
6455   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6456   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6457 
6458   /* Advance k to the first entry we need to take care of */
6459   for (k = 0; k < n1; k++)
6460     if (i1[k] > PETSC_MIN_INT) break;
6461   PetscInt i1start = k;
6462 
6463   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6464   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6465 
6466   /*           Send remote rows to their owner                                  */
6467   /* Find which rows should be sent to which remote ranks*/
6468   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6469   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6470   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6471   const PetscInt *ranges;
6472   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6473 
6474   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6475   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6476   for (k = rem; k < n1;) {
6477     PetscMPIInt owner;
6478     PetscInt    firstRow, lastRow;
6479 
6480     /* Locate a row range */
6481     firstRow = i1[k]; /* first row of this owner */
6482     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6483     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6484 
6485     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6486     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6487 
6488     /* All entries in [k,p) belong to this remote owner */
6489     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6490       PetscMPIInt *sendto2;
6491       PetscInt    *nentries2;
6492       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6493 
6494       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6495       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6496       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6497       PetscCall(PetscFree2(sendto, nentries2));
6498       sendto   = sendto2;
6499       nentries = nentries2;
6500       maxNsend = maxNsend2;
6501     }
6502     sendto[nsend]   = owner;
6503     nentries[nsend] = p - k;
6504     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6505     nsend++;
6506     k = p;
6507   }
6508 
6509   /* Build 1st SF to know offsets on remote to send data */
6510   PetscSF      sf1;
6511   PetscInt     nroots = 1, nroots2 = 0;
6512   PetscInt     nleaves = nsend, nleaves2 = 0;
6513   PetscInt    *offsets;
6514   PetscSFNode *iremote;
6515 
6516   PetscCall(PetscSFCreate(comm, &sf1));
6517   PetscCall(PetscMalloc1(nsend, &iremote));
6518   PetscCall(PetscMalloc1(nsend, &offsets));
6519   for (k = 0; k < nsend; k++) {
6520     iremote[k].rank  = sendto[k];
6521     iremote[k].index = 0;
6522     nleaves2 += nentries[k];
6523     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6524   }
6525   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6526   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6527   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6528   PetscCall(PetscSFDestroy(&sf1));
6529   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6530 
6531   /* Build 2nd SF to send remote COOs to their owner */
6532   PetscSF sf2;
6533   nroots  = nroots2;
6534   nleaves = nleaves2;
6535   PetscCall(PetscSFCreate(comm, &sf2));
6536   PetscCall(PetscSFSetFromOptions(sf2));
6537   PetscCall(PetscMalloc1(nleaves, &iremote));
6538   p = 0;
6539   for (k = 0; k < nsend; k++) {
6540     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6541     for (q = 0; q < nentries[k]; q++, p++) {
6542       iremote[p].rank  = sendto[k];
6543       iremote[p].index = offsets[k] + q;
6544     }
6545   }
6546   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6547 
6548   /* Send the remote COOs to their owner */
6549   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6550   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6551   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6552   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6553   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6554   PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */
6555   PetscInt *j1prem = j1 ? j1 + rem : NULL;
6556   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6557   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6558   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6559   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6560 
6561   PetscCall(PetscFree(offsets));
6562   PetscCall(PetscFree2(sendto, nentries));
6563 
6564   /* Sort received COOs by row along with the permutation array     */
6565   for (k = 0; k < n2; k++) perm2[k] = k;
6566   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6567 
6568   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6569   PetscCount *Cperm1;
6570   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6571   PetscCount *perm1prem = perm1 ? perm1 + rem : NULL;
6572   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6573   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6574 
6575   /* Support for HYPRE matrices, kind of a hack.
6576      Swap min column with diagonal so that diagonal values will go first */
6577   PetscBool   hypre;
6578   const char *name;
6579   PetscCall(PetscObjectGetName((PetscObject)mat, &name));
6580   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre));
6581   if (hypre) {
6582     PetscInt *minj;
6583     PetscBT   hasdiag;
6584 
6585     PetscCall(PetscBTCreate(m, &hasdiag));
6586     PetscCall(PetscMalloc1(m, &minj));
6587     for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT;
6588     for (k = i1start; k < rem; k++) {
6589       if (j1[k] < cstart || j1[k] >= cend) continue;
6590       const PetscInt rindex = i1[k] - rstart;
6591       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6592       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6593     }
6594     for (k = 0; k < n2; k++) {
6595       if (j2[k] < cstart || j2[k] >= cend) continue;
6596       const PetscInt rindex = i2[k] - rstart;
6597       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6598       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6599     }
6600     for (k = i1start; k < rem; k++) {
6601       const PetscInt rindex = i1[k] - rstart;
6602       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6603       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6604       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6605     }
6606     for (k = 0; k < n2; k++) {
6607       const PetscInt rindex = i2[k] - rstart;
6608       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6609       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6610       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6611     }
6612     PetscCall(PetscBTDestroy(&hasdiag));
6613     PetscCall(PetscFree(minj));
6614   }
6615 
6616   /* Split local COOs and received COOs into diag/offdiag portions */
6617   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6618   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6619   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6620   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6621   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6622   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6623 
6624   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6625   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6626   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6627   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6628 
6629   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6630   PetscInt *Ai, *Bi;
6631   PetscInt *Aj, *Bj;
6632 
6633   PetscCall(PetscMalloc1(m + 1, &Ai));
6634   PetscCall(PetscMalloc1(m + 1, &Bi));
6635   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6636   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6637 
6638   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6639   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6640   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6641   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6642   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6643 
6644   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6645   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6646 
6647   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6648   /* expect nonzeros in A/B most likely have local contributing entries        */
6649   PetscInt    Annz = Ai[m];
6650   PetscInt    Bnnz = Bi[m];
6651   PetscCount *Ajmap1_new, *Bjmap1_new;
6652 
6653   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6654   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6655 
6656   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6657   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6658 
6659   PetscCall(PetscFree(Aimap1));
6660   PetscCall(PetscFree(Ajmap1));
6661   PetscCall(PetscFree(Bimap1));
6662   PetscCall(PetscFree(Bjmap1));
6663   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6664   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6665   PetscCall(PetscFree(perm1));
6666   PetscCall(PetscFree3(i2, j2, perm2));
6667 
6668   Ajmap1 = Ajmap1_new;
6669   Bjmap1 = Bjmap1_new;
6670 
6671   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6672   if (Annz < Annz1 + Annz2) {
6673     PetscInt *Aj_new;
6674     PetscCall(PetscMalloc1(Annz, &Aj_new));
6675     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6676     PetscCall(PetscFree(Aj));
6677     Aj = Aj_new;
6678   }
6679 
6680   if (Bnnz < Bnnz1 + Bnnz2) {
6681     PetscInt *Bj_new;
6682     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6683     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6684     PetscCall(PetscFree(Bj));
6685     Bj = Bj_new;
6686   }
6687 
6688   /* Create new submatrices for on-process and off-process coupling                  */
6689   PetscScalar     *Aa, *Ba;
6690   MatType          rtype;
6691   Mat_SeqAIJ      *a, *b;
6692   PetscObjectState state;
6693   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6694   PetscCall(PetscCalloc1(Bnnz, &Ba));
6695   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6696   if (cstart) {
6697     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6698   }
6699 
6700   PetscCall(MatGetRootType_Private(mat, &rtype));
6701 
6702   MatSeqXAIJGetOptions_Private(mpiaij->A);
6703   PetscCall(MatDestroy(&mpiaij->A));
6704   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6705   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6706   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6707 
6708   MatSeqXAIJGetOptions_Private(mpiaij->B);
6709   PetscCall(MatDestroy(&mpiaij->B));
6710   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6711   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6712   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6713 
6714   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6715   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6716   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6717   PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6718 
6719   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6720   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6721   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6722   a->free_a = b->free_a = PETSC_TRUE;
6723   a->free_ij = b->free_ij = PETSC_TRUE;
6724 
6725   /* conversion must happen AFTER multiply setup */
6726   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6727   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6728   PetscCall(VecDestroy(&mpiaij->lvec));
6729   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6730 
6731   // Put the COO struct in a container and then attach that to the matrix
6732   PetscCall(PetscMalloc1(1, &coo));
6733   coo->n       = coo_n;
6734   coo->sf      = sf2;
6735   coo->sendlen = nleaves;
6736   coo->recvlen = nroots;
6737   coo->Annz    = Annz;
6738   coo->Bnnz    = Bnnz;
6739   coo->Annz2   = Annz2;
6740   coo->Bnnz2   = Bnnz2;
6741   coo->Atot1   = Atot1;
6742   coo->Atot2   = Atot2;
6743   coo->Btot1   = Btot1;
6744   coo->Btot2   = Btot2;
6745   coo->Ajmap1  = Ajmap1;
6746   coo->Aperm1  = Aperm1;
6747   coo->Bjmap1  = Bjmap1;
6748   coo->Bperm1  = Bperm1;
6749   coo->Aimap2  = Aimap2;
6750   coo->Ajmap2  = Ajmap2;
6751   coo->Aperm2  = Aperm2;
6752   coo->Bimap2  = Bimap2;
6753   coo->Bjmap2  = Bjmap2;
6754   coo->Bperm2  = Bperm2;
6755   coo->Cperm1  = Cperm1;
6756   // Allocate in preallocation. If not used, it has zero cost on host
6757   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6758   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6759   PetscCall(PetscContainerSetPointer(container, coo));
6760   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6761   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6762   PetscCall(PetscContainerDestroy(&container));
6763   PetscFunctionReturn(PETSC_SUCCESS);
6764 }
6765 
6766 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6767 {
6768   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6769   Mat                  A = mpiaij->A, B = mpiaij->B;
6770   PetscScalar         *Aa, *Ba;
6771   PetscScalar         *sendbuf, *recvbuf;
6772   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6773   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6774   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6775   const PetscCount    *Cperm1;
6776   PetscContainer       container;
6777   MatCOOStruct_MPIAIJ *coo;
6778 
6779   PetscFunctionBegin;
6780   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6781   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6782   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6783   sendbuf = coo->sendbuf;
6784   recvbuf = coo->recvbuf;
6785   Ajmap1  = coo->Ajmap1;
6786   Ajmap2  = coo->Ajmap2;
6787   Aimap2  = coo->Aimap2;
6788   Bjmap1  = coo->Bjmap1;
6789   Bjmap2  = coo->Bjmap2;
6790   Bimap2  = coo->Bimap2;
6791   Aperm1  = coo->Aperm1;
6792   Aperm2  = coo->Aperm2;
6793   Bperm1  = coo->Bperm1;
6794   Bperm2  = coo->Bperm2;
6795   Cperm1  = coo->Cperm1;
6796 
6797   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6798   PetscCall(MatSeqAIJGetArray(B, &Ba));
6799 
6800   /* Pack entries to be sent to remote */
6801   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6802 
6803   /* Send remote entries to their owner and overlap the communication with local computation */
6804   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6805   /* Add local entries to A and B */
6806   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6807     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6808     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6809     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6810   }
6811   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6812     PetscScalar sum = 0.0;
6813     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6814     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6815   }
6816   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6817 
6818   /* Add received remote entries to A and B */
6819   for (PetscCount i = 0; i < coo->Annz2; i++) {
6820     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6821   }
6822   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6823     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6824   }
6825   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6826   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6827   PetscFunctionReturn(PETSC_SUCCESS);
6828 }
6829 
6830 /*MC
6831    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6832 
6833    Options Database Keys:
6834 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6835 
6836    Level: beginner
6837 
6838    Notes:
6839    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6840     in this case the values associated with the rows and columns one passes in are set to zero
6841     in the matrix
6842 
6843     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6844     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6845 
6846 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6847 M*/
6848 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6849 {
6850   Mat_MPIAIJ *b;
6851   PetscMPIInt size;
6852 
6853   PetscFunctionBegin;
6854   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6855 
6856   PetscCall(PetscNew(&b));
6857   B->data       = (void *)b;
6858   B->ops[0]     = MatOps_Values;
6859   B->assembled  = PETSC_FALSE;
6860   B->insertmode = NOT_SET_VALUES;
6861   b->size       = size;
6862 
6863   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6864 
6865   /* build cache for off array entries formed */
6866   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6867 
6868   b->donotstash  = PETSC_FALSE;
6869   b->colmap      = NULL;
6870   b->garray      = NULL;
6871   b->roworiented = PETSC_TRUE;
6872 
6873   /* stuff used for matrix vector multiply */
6874   b->lvec  = NULL;
6875   b->Mvctx = NULL;
6876 
6877   /* stuff for MatGetRow() */
6878   b->rowindices   = NULL;
6879   b->rowvalues    = NULL;
6880   b->getrowactive = PETSC_FALSE;
6881 
6882   /* flexible pointer used in CUSPARSE classes */
6883   b->spptr = NULL;
6884 
6885   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6886   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6887   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6889   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6892   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6895 #if defined(PETSC_HAVE_CUDA)
6896   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6897 #endif
6898 #if defined(PETSC_HAVE_HIP)
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6900 #endif
6901 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6903 #endif
6904 #if defined(PETSC_HAVE_MKL_SPARSE)
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6906 #endif
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6911 #if defined(PETSC_HAVE_ELEMENTAL)
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6913 #endif
6914 #if defined(PETSC_HAVE_SCALAPACK)
6915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6916 #endif
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6919 #if defined(PETSC_HAVE_HYPRE)
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6922 #endif
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6927   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6928   PetscFunctionReturn(PETSC_SUCCESS);
6929 }
6930 
6931 /*@C
6932   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6933   and "off-diagonal" part of the matrix in CSR format.
6934 
6935   Collective
6936 
6937   Input Parameters:
6938 + comm - MPI communicator
6939 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6940 . n    - This value should be the same as the local size used in creating the
6941          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6942          calculated if `N` is given) For square matrices `n` is almost always `m`.
6943 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6944 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6945 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6946 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6947 . a    - matrix values
6948 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6949 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6950 - oa   - matrix values
6951 
6952   Output Parameter:
6953 . mat - the matrix
6954 
6955   Level: advanced
6956 
6957   Notes:
6958   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6959   must free the arrays once the matrix has been destroyed and not before.
6960 
6961   The `i` and `j` indices are 0 based
6962 
6963   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6964 
6965   This sets local rows and cannot be used to set off-processor values.
6966 
6967   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6968   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6969   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6970   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6971   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6972   communication if it is known that only local entries will be set.
6973 
6974 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6975           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6976 @*/
6977 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6978 {
6979   Mat_MPIAIJ *maij;
6980 
6981   PetscFunctionBegin;
6982   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6983   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6984   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6985   PetscCall(MatCreate(comm, mat));
6986   PetscCall(MatSetSizes(*mat, m, n, M, N));
6987   PetscCall(MatSetType(*mat, MATMPIAIJ));
6988   maij = (Mat_MPIAIJ *)(*mat)->data;
6989 
6990   (*mat)->preallocated = PETSC_TRUE;
6991 
6992   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6993   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6994 
6995   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6996   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6997 
6998   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6999   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7000   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7001   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7002   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7003   PetscFunctionReturn(PETSC_SUCCESS);
7004 }
7005 
7006 typedef struct {
7007   Mat       *mp;    /* intermediate products */
7008   PetscBool *mptmp; /* is the intermediate product temporary ? */
7009   PetscInt   cp;    /* number of intermediate products */
7010 
7011   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7012   PetscInt    *startsj_s, *startsj_r;
7013   PetscScalar *bufa;
7014   Mat          P_oth;
7015 
7016   /* may take advantage of merging product->B */
7017   Mat Bloc; /* B-local by merging diag and off-diag */
7018 
7019   /* cusparse does not have support to split between symbolic and numeric phases.
7020      When api_user is true, we don't need to update the numerical values
7021      of the temporary storage */
7022   PetscBool reusesym;
7023 
7024   /* support for COO values insertion */
7025   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7026   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7027   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7028   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7029   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7030   PetscMemType mtype;
7031 
7032   /* customization */
7033   PetscBool abmerge;
7034   PetscBool P_oth_bind;
7035 } MatMatMPIAIJBACKEND;
7036 
7037 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7038 {
7039   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7040   PetscInt             i;
7041 
7042   PetscFunctionBegin;
7043   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7044   PetscCall(PetscFree(mmdata->bufa));
7045   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7046   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7047   PetscCall(MatDestroy(&mmdata->P_oth));
7048   PetscCall(MatDestroy(&mmdata->Bloc));
7049   PetscCall(PetscSFDestroy(&mmdata->sf));
7050   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7051   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7052   PetscCall(PetscFree(mmdata->own[0]));
7053   PetscCall(PetscFree(mmdata->own));
7054   PetscCall(PetscFree(mmdata->off[0]));
7055   PetscCall(PetscFree(mmdata->off));
7056   PetscCall(PetscFree(mmdata));
7057   PetscFunctionReturn(PETSC_SUCCESS);
7058 }
7059 
7060 /* Copy selected n entries with indices in idx[] of A to v[].
7061    If idx is NULL, copy the whole data array of A to v[]
7062  */
7063 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7064 {
7065   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7066 
7067   PetscFunctionBegin;
7068   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7069   if (f) {
7070     PetscCall((*f)(A, n, idx, v));
7071   } else {
7072     const PetscScalar *vv;
7073 
7074     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7075     if (n && idx) {
7076       PetscScalar    *w  = v;
7077       const PetscInt *oi = idx;
7078       PetscInt        j;
7079 
7080       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7081     } else {
7082       PetscCall(PetscArraycpy(v, vv, n));
7083     }
7084     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7085   }
7086   PetscFunctionReturn(PETSC_SUCCESS);
7087 }
7088 
7089 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7090 {
7091   MatMatMPIAIJBACKEND *mmdata;
7092   PetscInt             i, n_d, n_o;
7093 
7094   PetscFunctionBegin;
7095   MatCheckProduct(C, 1);
7096   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7097   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7098   if (!mmdata->reusesym) { /* update temporary matrices */
7099     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7100     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7101   }
7102   mmdata->reusesym = PETSC_FALSE;
7103 
7104   for (i = 0; i < mmdata->cp; i++) {
7105     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7106     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7107   }
7108   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7109     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7110 
7111     if (mmdata->mptmp[i]) continue;
7112     if (noff) {
7113       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7114 
7115       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7116       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7117       n_o += noff;
7118       n_d += nown;
7119     } else {
7120       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7121 
7122       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7123       n_d += mm->nz;
7124     }
7125   }
7126   if (mmdata->hasoffproc) { /* offprocess insertion */
7127     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7128     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7129   }
7130   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7131   PetscFunctionReturn(PETSC_SUCCESS);
7132 }
7133 
7134 /* Support for Pt * A, A * P, or Pt * A * P */
7135 #define MAX_NUMBER_INTERMEDIATE 4
7136 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7137 {
7138   Mat_Product           *product = C->product;
7139   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7140   Mat_MPIAIJ            *a, *p;
7141   MatMatMPIAIJBACKEND   *mmdata;
7142   ISLocalToGlobalMapping P_oth_l2g = NULL;
7143   IS                     glob      = NULL;
7144   const char            *prefix;
7145   char                   pprefix[256];
7146   const PetscInt        *globidx, *P_oth_idx;
7147   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7148   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7149   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7150                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7151                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7152   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7153 
7154   MatProductType ptype;
7155   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7156   PetscMPIInt    size;
7157 
7158   PetscFunctionBegin;
7159   MatCheckProduct(C, 1);
7160   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7161   ptype = product->type;
7162   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7163     ptype                                          = MATPRODUCT_AB;
7164     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7165   }
7166   switch (ptype) {
7167   case MATPRODUCT_AB:
7168     A          = product->A;
7169     P          = product->B;
7170     m          = A->rmap->n;
7171     n          = P->cmap->n;
7172     M          = A->rmap->N;
7173     N          = P->cmap->N;
7174     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7175     break;
7176   case MATPRODUCT_AtB:
7177     P          = product->A;
7178     A          = product->B;
7179     m          = P->cmap->n;
7180     n          = A->cmap->n;
7181     M          = P->cmap->N;
7182     N          = A->cmap->N;
7183     hasoffproc = PETSC_TRUE;
7184     break;
7185   case MATPRODUCT_PtAP:
7186     A          = product->A;
7187     P          = product->B;
7188     m          = P->cmap->n;
7189     n          = P->cmap->n;
7190     M          = P->cmap->N;
7191     N          = P->cmap->N;
7192     hasoffproc = PETSC_TRUE;
7193     break;
7194   default:
7195     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7196   }
7197   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7198   if (size == 1) hasoffproc = PETSC_FALSE;
7199 
7200   /* defaults */
7201   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7202     mp[i]    = NULL;
7203     mptmp[i] = PETSC_FALSE;
7204     rmapt[i] = -1;
7205     cmapt[i] = -1;
7206     rmapa[i] = NULL;
7207     cmapa[i] = NULL;
7208   }
7209 
7210   /* customization */
7211   PetscCall(PetscNew(&mmdata));
7212   mmdata->reusesym = product->api_user;
7213   if (ptype == MATPRODUCT_AB) {
7214     if (product->api_user) {
7215       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7216       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7217       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7218       PetscOptionsEnd();
7219     } else {
7220       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7221       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7222       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7223       PetscOptionsEnd();
7224     }
7225   } else if (ptype == MATPRODUCT_PtAP) {
7226     if (product->api_user) {
7227       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7228       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7229       PetscOptionsEnd();
7230     } else {
7231       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7232       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7233       PetscOptionsEnd();
7234     }
7235   }
7236   a = (Mat_MPIAIJ *)A->data;
7237   p = (Mat_MPIAIJ *)P->data;
7238   PetscCall(MatSetSizes(C, m, n, M, N));
7239   PetscCall(PetscLayoutSetUp(C->rmap));
7240   PetscCall(PetscLayoutSetUp(C->cmap));
7241   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7242   PetscCall(MatGetOptionsPrefix(C, &prefix));
7243 
7244   cp = 0;
7245   switch (ptype) {
7246   case MATPRODUCT_AB: /* A * P */
7247     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7248 
7249     /* A_diag * P_local (merged or not) */
7250     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7251       /* P is product->B */
7252       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7253       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7254       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7255       PetscCall(MatProductSetFill(mp[cp], product->fill));
7256       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7257       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7258       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7259       mp[cp]->product->api_user = product->api_user;
7260       PetscCall(MatProductSetFromOptions(mp[cp]));
7261       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7262       PetscCall(ISGetIndices(glob, &globidx));
7263       rmapt[cp] = 1;
7264       cmapt[cp] = 2;
7265       cmapa[cp] = globidx;
7266       mptmp[cp] = PETSC_FALSE;
7267       cp++;
7268     } else { /* A_diag * P_diag and A_diag * P_off */
7269       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7270       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7271       PetscCall(MatProductSetFill(mp[cp], product->fill));
7272       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7273       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7274       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7275       mp[cp]->product->api_user = product->api_user;
7276       PetscCall(MatProductSetFromOptions(mp[cp]));
7277       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7278       rmapt[cp] = 1;
7279       cmapt[cp] = 1;
7280       mptmp[cp] = PETSC_FALSE;
7281       cp++;
7282       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7283       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7284       PetscCall(MatProductSetFill(mp[cp], product->fill));
7285       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7286       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7287       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7288       mp[cp]->product->api_user = product->api_user;
7289       PetscCall(MatProductSetFromOptions(mp[cp]));
7290       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7291       rmapt[cp] = 1;
7292       cmapt[cp] = 2;
7293       cmapa[cp] = p->garray;
7294       mptmp[cp] = PETSC_FALSE;
7295       cp++;
7296     }
7297 
7298     /* A_off * P_other */
7299     if (mmdata->P_oth) {
7300       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7301       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7302       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7303       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7304       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7305       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7306       PetscCall(MatProductSetFill(mp[cp], product->fill));
7307       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7308       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7309       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7310       mp[cp]->product->api_user = product->api_user;
7311       PetscCall(MatProductSetFromOptions(mp[cp]));
7312       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7313       rmapt[cp] = 1;
7314       cmapt[cp] = 2;
7315       cmapa[cp] = P_oth_idx;
7316       mptmp[cp] = PETSC_FALSE;
7317       cp++;
7318     }
7319     break;
7320 
7321   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7322     /* A is product->B */
7323     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7324     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7325       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7326       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7327       PetscCall(MatProductSetFill(mp[cp], product->fill));
7328       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7329       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7330       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7331       mp[cp]->product->api_user = product->api_user;
7332       PetscCall(MatProductSetFromOptions(mp[cp]));
7333       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7334       PetscCall(ISGetIndices(glob, &globidx));
7335       rmapt[cp] = 2;
7336       rmapa[cp] = globidx;
7337       cmapt[cp] = 2;
7338       cmapa[cp] = globidx;
7339       mptmp[cp] = PETSC_FALSE;
7340       cp++;
7341     } else {
7342       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7343       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7344       PetscCall(MatProductSetFill(mp[cp], product->fill));
7345       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7346       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7347       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7348       mp[cp]->product->api_user = product->api_user;
7349       PetscCall(MatProductSetFromOptions(mp[cp]));
7350       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7351       PetscCall(ISGetIndices(glob, &globidx));
7352       rmapt[cp] = 1;
7353       cmapt[cp] = 2;
7354       cmapa[cp] = globidx;
7355       mptmp[cp] = PETSC_FALSE;
7356       cp++;
7357       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7358       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7359       PetscCall(MatProductSetFill(mp[cp], product->fill));
7360       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7361       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7362       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7363       mp[cp]->product->api_user = product->api_user;
7364       PetscCall(MatProductSetFromOptions(mp[cp]));
7365       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7366       rmapt[cp] = 2;
7367       rmapa[cp] = p->garray;
7368       cmapt[cp] = 2;
7369       cmapa[cp] = globidx;
7370       mptmp[cp] = PETSC_FALSE;
7371       cp++;
7372     }
7373     break;
7374   case MATPRODUCT_PtAP:
7375     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7376     /* P is product->B */
7377     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7378     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7379     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7380     PetscCall(MatProductSetFill(mp[cp], product->fill));
7381     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7382     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7383     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7384     mp[cp]->product->api_user = product->api_user;
7385     PetscCall(MatProductSetFromOptions(mp[cp]));
7386     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7387     PetscCall(ISGetIndices(glob, &globidx));
7388     rmapt[cp] = 2;
7389     rmapa[cp] = globidx;
7390     cmapt[cp] = 2;
7391     cmapa[cp] = globidx;
7392     mptmp[cp] = PETSC_FALSE;
7393     cp++;
7394     if (mmdata->P_oth) {
7395       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7396       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7397       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7398       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7399       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7400       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7401       PetscCall(MatProductSetFill(mp[cp], product->fill));
7402       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7403       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7404       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7405       mp[cp]->product->api_user = product->api_user;
7406       PetscCall(MatProductSetFromOptions(mp[cp]));
7407       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7408       mptmp[cp] = PETSC_TRUE;
7409       cp++;
7410       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7411       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7412       PetscCall(MatProductSetFill(mp[cp], product->fill));
7413       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7414       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7415       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7416       mp[cp]->product->api_user = product->api_user;
7417       PetscCall(MatProductSetFromOptions(mp[cp]));
7418       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7419       rmapt[cp] = 2;
7420       rmapa[cp] = globidx;
7421       cmapt[cp] = 2;
7422       cmapa[cp] = P_oth_idx;
7423       mptmp[cp] = PETSC_FALSE;
7424       cp++;
7425     }
7426     break;
7427   default:
7428     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7429   }
7430   /* sanity check */
7431   if (size > 1)
7432     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7433 
7434   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7435   for (i = 0; i < cp; i++) {
7436     mmdata->mp[i]    = mp[i];
7437     mmdata->mptmp[i] = mptmp[i];
7438   }
7439   mmdata->cp             = cp;
7440   C->product->data       = mmdata;
7441   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7442   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7443 
7444   /* memory type */
7445   mmdata->mtype = PETSC_MEMTYPE_HOST;
7446   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7447   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7448   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7449   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7450   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7451   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7452 
7453   /* prepare coo coordinates for values insertion */
7454 
7455   /* count total nonzeros of those intermediate seqaij Mats
7456     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7457     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7458     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7459   */
7460   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7461     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7462     if (mptmp[cp]) continue;
7463     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7464       const PetscInt *rmap = rmapa[cp];
7465       const PetscInt  mr   = mp[cp]->rmap->n;
7466       const PetscInt  rs   = C->rmap->rstart;
7467       const PetscInt  re   = C->rmap->rend;
7468       const PetscInt *ii   = mm->i;
7469       for (i = 0; i < mr; i++) {
7470         const PetscInt gr = rmap[i];
7471         const PetscInt nz = ii[i + 1] - ii[i];
7472         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7473         else ncoo_oown += nz;                  /* this row is local */
7474       }
7475     } else ncoo_d += mm->nz;
7476   }
7477 
7478   /*
7479     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7480 
7481     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7482 
7483     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7484 
7485     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7486     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7487     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7488 
7489     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7490     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7491   */
7492   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7493   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7494 
7495   /* gather (i,j) of nonzeros inserted by remote procs */
7496   if (hasoffproc) {
7497     PetscSF  msf;
7498     PetscInt ncoo2, *coo_i2, *coo_j2;
7499 
7500     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7501     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7502     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7503 
7504     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7505       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7506       PetscInt   *idxoff = mmdata->off[cp];
7507       PetscInt   *idxown = mmdata->own[cp];
7508       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7509         const PetscInt *rmap = rmapa[cp];
7510         const PetscInt *cmap = cmapa[cp];
7511         const PetscInt *ii   = mm->i;
7512         PetscInt       *coi  = coo_i + ncoo_o;
7513         PetscInt       *coj  = coo_j + ncoo_o;
7514         const PetscInt  mr   = mp[cp]->rmap->n;
7515         const PetscInt  rs   = C->rmap->rstart;
7516         const PetscInt  re   = C->rmap->rend;
7517         const PetscInt  cs   = C->cmap->rstart;
7518         for (i = 0; i < mr; i++) {
7519           const PetscInt *jj = mm->j + ii[i];
7520           const PetscInt  gr = rmap[i];
7521           const PetscInt  nz = ii[i + 1] - ii[i];
7522           if (gr < rs || gr >= re) { /* this is an offproc row */
7523             for (j = ii[i]; j < ii[i + 1]; j++) {
7524               *coi++    = gr;
7525               *idxoff++ = j;
7526             }
7527             if (!cmapt[cp]) { /* already global */
7528               for (j = 0; j < nz; j++) *coj++ = jj[j];
7529             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7530               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7531             } else { /* offdiag */
7532               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7533             }
7534             ncoo_o += nz;
7535           } else { /* this is a local row */
7536             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7537           }
7538         }
7539       }
7540       mmdata->off[cp + 1] = idxoff;
7541       mmdata->own[cp + 1] = idxown;
7542     }
7543 
7544     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7545     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7546     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7547     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7548     ncoo = ncoo_d + ncoo_oown + ncoo2;
7549     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7550     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7551     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7552     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7553     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7554     PetscCall(PetscFree2(coo_i, coo_j));
7555     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7556     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7557     coo_i = coo_i2;
7558     coo_j = coo_j2;
7559   } else { /* no offproc values insertion */
7560     ncoo = ncoo_d;
7561     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7562 
7563     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7564     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7565     PetscCall(PetscSFSetUp(mmdata->sf));
7566   }
7567   mmdata->hasoffproc = hasoffproc;
7568 
7569   /* gather (i,j) of nonzeros inserted locally */
7570   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7571     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7572     PetscInt       *coi  = coo_i + ncoo_d;
7573     PetscInt       *coj  = coo_j + ncoo_d;
7574     const PetscInt *jj   = mm->j;
7575     const PetscInt *ii   = mm->i;
7576     const PetscInt *cmap = cmapa[cp];
7577     const PetscInt *rmap = rmapa[cp];
7578     const PetscInt  mr   = mp[cp]->rmap->n;
7579     const PetscInt  rs   = C->rmap->rstart;
7580     const PetscInt  re   = C->rmap->rend;
7581     const PetscInt  cs   = C->cmap->rstart;
7582 
7583     if (mptmp[cp]) continue;
7584     if (rmapt[cp] == 1) { /* consecutive rows */
7585       /* fill coo_i */
7586       for (i = 0; i < mr; i++) {
7587         const PetscInt gr = i + rs;
7588         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7589       }
7590       /* fill coo_j */
7591       if (!cmapt[cp]) { /* type-0, already global */
7592         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7593       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7594         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7595       } else {                                            /* type-2, local to global for sparse columns */
7596         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7597       }
7598       ncoo_d += mm->nz;
7599     } else if (rmapt[cp] == 2) { /* sparse rows */
7600       for (i = 0; i < mr; i++) {
7601         const PetscInt *jj = mm->j + ii[i];
7602         const PetscInt  gr = rmap[i];
7603         const PetscInt  nz = ii[i + 1] - ii[i];
7604         if (gr >= rs && gr < re) { /* local rows */
7605           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7606           if (!cmapt[cp]) { /* type-0, already global */
7607             for (j = 0; j < nz; j++) *coj++ = jj[j];
7608           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7609             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7610           } else { /* type-2, local to global for sparse columns */
7611             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7612           }
7613           ncoo_d += nz;
7614         }
7615       }
7616     }
7617   }
7618   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7619   PetscCall(ISDestroy(&glob));
7620   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7621   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7622   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7623   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7624 
7625   /* preallocate with COO data */
7626   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7627   PetscCall(PetscFree2(coo_i, coo_j));
7628   PetscFunctionReturn(PETSC_SUCCESS);
7629 }
7630 
7631 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7632 {
7633   Mat_Product *product = mat->product;
7634 #if defined(PETSC_HAVE_DEVICE)
7635   PetscBool match  = PETSC_FALSE;
7636   PetscBool usecpu = PETSC_FALSE;
7637 #else
7638   PetscBool match = PETSC_TRUE;
7639 #endif
7640 
7641   PetscFunctionBegin;
7642   MatCheckProduct(mat, 1);
7643 #if defined(PETSC_HAVE_DEVICE)
7644   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7645   if (match) { /* we can always fallback to the CPU if requested */
7646     switch (product->type) {
7647     case MATPRODUCT_AB:
7648       if (product->api_user) {
7649         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7650         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7651         PetscOptionsEnd();
7652       } else {
7653         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7654         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7655         PetscOptionsEnd();
7656       }
7657       break;
7658     case MATPRODUCT_AtB:
7659       if (product->api_user) {
7660         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7661         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7662         PetscOptionsEnd();
7663       } else {
7664         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7665         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7666         PetscOptionsEnd();
7667       }
7668       break;
7669     case MATPRODUCT_PtAP:
7670       if (product->api_user) {
7671         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7672         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7673         PetscOptionsEnd();
7674       } else {
7675         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7676         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7677         PetscOptionsEnd();
7678       }
7679       break;
7680     default:
7681       break;
7682     }
7683     match = (PetscBool)!usecpu;
7684   }
7685 #endif
7686   if (match) {
7687     switch (product->type) {
7688     case MATPRODUCT_AB:
7689     case MATPRODUCT_AtB:
7690     case MATPRODUCT_PtAP:
7691       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7692       break;
7693     default:
7694       break;
7695     }
7696   }
7697   /* fallback to MPIAIJ ops */
7698   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7699   PetscFunctionReturn(PETSC_SUCCESS);
7700 }
7701 
7702 /*
7703    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7704 
7705    n - the number of block indices in cc[]
7706    cc - the block indices (must be large enough to contain the indices)
7707 */
7708 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7709 {
7710   PetscInt        cnt = -1, nidx, j;
7711   const PetscInt *idx;
7712 
7713   PetscFunctionBegin;
7714   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7715   if (nidx) {
7716     cnt     = 0;
7717     cc[cnt] = idx[0] / bs;
7718     for (j = 1; j < nidx; j++) {
7719       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7720     }
7721   }
7722   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7723   *n = cnt + 1;
7724   PetscFunctionReturn(PETSC_SUCCESS);
7725 }
7726 
7727 /*
7728     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7729 
7730     ncollapsed - the number of block indices
7731     collapsed - the block indices (must be large enough to contain the indices)
7732 */
7733 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7734 {
7735   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7736 
7737   PetscFunctionBegin;
7738   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7739   for (i = start + 1; i < start + bs; i++) {
7740     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7741     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7742     cprevtmp = cprev;
7743     cprev    = merged;
7744     merged   = cprevtmp;
7745   }
7746   *ncollapsed = nprev;
7747   if (collapsed) *collapsed = cprev;
7748   PetscFunctionReturn(PETSC_SUCCESS);
7749 }
7750 
7751 /*
7752  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7753 
7754  Input Parameter:
7755  . Amat - matrix
7756  - symmetrize - make the result symmetric
7757  + scale - scale with diagonal
7758 
7759  Output Parameter:
7760  . a_Gmat - output scalar graph >= 0
7761 
7762 */
7763 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7764 {
7765   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7766   MPI_Comm  comm;
7767   Mat       Gmat;
7768   PetscBool ismpiaij, isseqaij;
7769   Mat       a, b, c;
7770   MatType   jtype;
7771 
7772   PetscFunctionBegin;
7773   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7774   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7775   PetscCall(MatGetSize(Amat, &MM, &NN));
7776   PetscCall(MatGetBlockSize(Amat, &bs));
7777   nloc = (Iend - Istart) / bs;
7778 
7779   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7780   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7781   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7782 
7783   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7784   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7785      implementation */
7786   if (bs > 1) {
7787     PetscCall(MatGetType(Amat, &jtype));
7788     PetscCall(MatCreate(comm, &Gmat));
7789     PetscCall(MatSetType(Gmat, jtype));
7790     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7791     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7792     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7793       PetscInt  *d_nnz, *o_nnz;
7794       MatScalar *aa, val, *AA;
7795       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7796       if (isseqaij) {
7797         a = Amat;
7798         b = NULL;
7799       } else {
7800         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7801         a             = d->A;
7802         b             = d->B;
7803       }
7804       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7805       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7806       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7807         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7808         const PetscInt *cols1, *cols2;
7809         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7810           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7811           nnz[brow / bs] = nc2 / bs;
7812           if (nc2 % bs) ok = 0;
7813           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7814           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7815             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7816             if (nc1 != nc2) ok = 0;
7817             else {
7818               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7819                 if (cols1[jj] != cols2[jj]) ok = 0;
7820                 if (cols1[jj] % bs != jj % bs) ok = 0;
7821               }
7822             }
7823             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7824           }
7825           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7826           if (!ok) {
7827             PetscCall(PetscFree2(d_nnz, o_nnz));
7828             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7829             goto old_bs;
7830           }
7831         }
7832       }
7833       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7834       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7835       PetscCall(PetscFree2(d_nnz, o_nnz));
7836       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7837       // diag
7838       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7839         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7840         ai               = aseq->i;
7841         n                = ai[brow + 1] - ai[brow];
7842         aj               = aseq->j + ai[brow];
7843         for (int k = 0; k < n; k += bs) {        // block columns
7844           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7845           val        = 0;
7846           if (index_size == 0) {
7847             for (int ii = 0; ii < bs; ii++) { // rows in block
7848               aa = aseq->a + ai[brow + ii] + k;
7849               for (int jj = 0; jj < bs; jj++) {         // columns in block
7850                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7851               }
7852             }
7853           } else {                                       // use (index,index) value if provided
7854             for (int iii = 0; iii < index_size; iii++) { // rows in block
7855               int ii = index[iii];
7856               aa     = aseq->a + ai[brow + ii] + k;
7857               for (int jjj = 0; jjj < index_size; jjj++) { // columns in block
7858                 int jj = index[jjj];
7859                 val += PetscAbs(PetscRealPart(aa[jj]));
7860               }
7861             }
7862           }
7863           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7864           AA[k / bs] = val;
7865         }
7866         grow = Istart / bs + brow / bs;
7867         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7868       }
7869       // off-diag
7870       if (ismpiaij) {
7871         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7872         const PetscScalar *vals;
7873         const PetscInt    *cols, *garray = aij->garray;
7874         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7875         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7876           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7877           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7878             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7879             AA[k / bs] = 0;
7880             AJ[cidx]   = garray[cols[k]] / bs;
7881           }
7882           nc = ncols / bs;
7883           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7884           if (index_size == 0) {
7885             for (int ii = 0; ii < bs; ii++) { // rows in block
7886               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7887               for (int k = 0; k < ncols; k += bs) {
7888                 for (int jj = 0; jj < bs; jj++) { // cols in block
7889                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7890                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7891                 }
7892               }
7893               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7894             }
7895           } else {                                       // use (index,index) value if provided
7896             for (int iii = 0; iii < index_size; iii++) { // rows in block
7897               int ii = index[iii];
7898               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7899               for (int k = 0; k < ncols; k += bs) {
7900                 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block
7901                   int jj = index[jjj];
7902                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7903                 }
7904               }
7905               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7906             }
7907           }
7908           grow = Istart / bs + brow / bs;
7909           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7910         }
7911       }
7912       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7913       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7914       PetscCall(PetscFree2(AA, AJ));
7915     } else {
7916       const PetscScalar *vals;
7917       const PetscInt    *idx;
7918       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7919     old_bs:
7920       /*
7921        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7922        */
7923       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7924       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7925       if (isseqaij) {
7926         PetscInt max_d_nnz;
7927         /*
7928          Determine exact preallocation count for (sequential) scalar matrix
7929          */
7930         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7931         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7932         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7933         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7934         PetscCall(PetscFree3(w0, w1, w2));
7935       } else if (ismpiaij) {
7936         Mat             Daij, Oaij;
7937         const PetscInt *garray;
7938         PetscInt        max_d_nnz;
7939         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7940         /*
7941          Determine exact preallocation count for diagonal block portion of scalar matrix
7942          */
7943         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7944         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7945         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7946         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7947         PetscCall(PetscFree3(w0, w1, w2));
7948         /*
7949          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7950          */
7951         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7952           o_nnz[jj] = 0;
7953           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7954             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7955             o_nnz[jj] += ncols;
7956             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7957           }
7958           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7959         }
7960       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7961       /* get scalar copy (norms) of matrix */
7962       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7963       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7964       PetscCall(PetscFree2(d_nnz, o_nnz));
7965       for (Ii = Istart; Ii < Iend; Ii++) {
7966         PetscInt dest_row = Ii / bs;
7967         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7968         for (jj = 0; jj < ncols; jj++) {
7969           PetscInt    dest_col = idx[jj] / bs;
7970           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7971           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7972         }
7973         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7974       }
7975       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7976       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7977     }
7978   } else {
7979     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7980     else {
7981       Gmat = Amat;
7982       PetscCall(PetscObjectReference((PetscObject)Gmat));
7983     }
7984     if (isseqaij) {
7985       a = Gmat;
7986       b = NULL;
7987     } else {
7988       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7989       a             = d->A;
7990       b             = d->B;
7991     }
7992     if (filter >= 0 || scale) {
7993       /* take absolute value of each entry */
7994       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7995         MatInfo      info;
7996         PetscScalar *avals;
7997         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7998         PetscCall(MatSeqAIJGetArray(c, &avals));
7999         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8000         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8001       }
8002     }
8003   }
8004   if (symmetrize) {
8005     PetscBool isset, issym;
8006     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8007     if (!isset || !issym) {
8008       Mat matTrans;
8009       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8010       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8011       PetscCall(MatDestroy(&matTrans));
8012     }
8013     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8014   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8015   if (scale) {
8016     /* scale c for all diagonal values = 1 or -1 */
8017     Vec diag;
8018     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8019     PetscCall(MatGetDiagonal(Gmat, diag));
8020     PetscCall(VecReciprocal(diag));
8021     PetscCall(VecSqrtAbs(diag));
8022     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8023     PetscCall(VecDestroy(&diag));
8024   }
8025   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8026 
8027   if (filter >= 0) {
8028     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8029     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8030   }
8031   *a_Gmat = Gmat;
8032   PetscFunctionReturn(PETSC_SUCCESS);
8033 }
8034 
8035 /*
8036     Special version for direct calls from Fortran
8037 */
8038 
8039 /* Change these macros so can be used in void function */
8040 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8041 #undef PetscCall
8042 #define PetscCall(...) \
8043   do { \
8044     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8045     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8046       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8047       return; \
8048     } \
8049   } while (0)
8050 
8051 #undef SETERRQ
8052 #define SETERRQ(comm, ierr, ...) \
8053   do { \
8054     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8055     return; \
8056   } while (0)
8057 
8058 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8059   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8060 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8061   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8062 #else
8063 #endif
8064 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8065 {
8066   Mat         mat = *mmat;
8067   PetscInt    m = *mm, n = *mn;
8068   InsertMode  addv = *maddv;
8069   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8070   PetscScalar value;
8071 
8072   MatCheckPreallocated(mat, 1);
8073   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8074   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8075   {
8076     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8077     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8078     PetscBool roworiented = aij->roworiented;
8079 
8080     /* Some Variables required in the macro */
8081     Mat         A     = aij->A;
8082     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8083     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8084     MatScalar  *aa;
8085     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8086     Mat         B                 = aij->B;
8087     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8088     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8089     MatScalar  *ba;
8090     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8091      * cannot use "#if defined" inside a macro. */
8092     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8093 
8094     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8095     PetscInt   nonew = a->nonew;
8096     MatScalar *ap1, *ap2;
8097 
8098     PetscFunctionBegin;
8099     PetscCall(MatSeqAIJGetArray(A, &aa));
8100     PetscCall(MatSeqAIJGetArray(B, &ba));
8101     for (i = 0; i < m; i++) {
8102       if (im[i] < 0) continue;
8103       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8104       if (im[i] >= rstart && im[i] < rend) {
8105         row      = im[i] - rstart;
8106         lastcol1 = -1;
8107         rp1      = aj + ai[row];
8108         ap1      = aa + ai[row];
8109         rmax1    = aimax[row];
8110         nrow1    = ailen[row];
8111         low1     = 0;
8112         high1    = nrow1;
8113         lastcol2 = -1;
8114         rp2      = bj + bi[row];
8115         ap2      = ba + bi[row];
8116         rmax2    = bimax[row];
8117         nrow2    = bilen[row];
8118         low2     = 0;
8119         high2    = nrow2;
8120 
8121         for (j = 0; j < n; j++) {
8122           if (roworiented) value = v[i * n + j];
8123           else value = v[i + j * m];
8124           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8125           if (in[j] >= cstart && in[j] < cend) {
8126             col = in[j] - cstart;
8127             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8128           } else if (in[j] < 0) continue;
8129           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8130             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8131           } else {
8132             if (mat->was_assembled) {
8133               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8134 #if defined(PETSC_USE_CTABLE)
8135               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8136               col--;
8137 #else
8138               col = aij->colmap[in[j]] - 1;
8139 #endif
8140               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8141                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8142                 col = in[j];
8143                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8144                 B        = aij->B;
8145                 b        = (Mat_SeqAIJ *)B->data;
8146                 bimax    = b->imax;
8147                 bi       = b->i;
8148                 bilen    = b->ilen;
8149                 bj       = b->j;
8150                 rp2      = bj + bi[row];
8151                 ap2      = ba + bi[row];
8152                 rmax2    = bimax[row];
8153                 nrow2    = bilen[row];
8154                 low2     = 0;
8155                 high2    = nrow2;
8156                 bm       = aij->B->rmap->n;
8157                 ba       = b->a;
8158                 inserted = PETSC_FALSE;
8159               }
8160             } else col = in[j];
8161             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8162           }
8163         }
8164       } else if (!aij->donotstash) {
8165         if (roworiented) {
8166           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8167         } else {
8168           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8169         }
8170       }
8171     }
8172     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8173     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8174   }
8175   PetscFunctionReturnVoid();
8176 }
8177 
8178 /* Undefining these here since they were redefined from their original definition above! No
8179  * other PETSc functions should be defined past this point, as it is impossible to recover the
8180  * original definitions */
8181 #undef PetscCall
8182 #undef SETERRQ
8183