xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dc97da86830806a16cfaacfab740baa2e54c8135)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287   PetscMPIInt        in;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   PetscCall(PetscMPIIntCast(n, &in));
313   if (type == NORM_INFINITY) {
314     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
315   } else {
316     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
317   }
318   PetscCall(PetscFree(work));
319   if (type == NORM_2) {
320     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
321   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
322     for (i = 0; i < n; i++) reductions[i] /= m;
323   }
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
328 {
329   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
330   IS              sis, gis;
331   const PetscInt *isis, *igis;
332   PetscInt        n, *iis, nsis, ngis, rstart, i;
333 
334   PetscFunctionBegin;
335   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
336   PetscCall(MatFindNonzeroRows(a->B, &gis));
337   PetscCall(ISGetSize(gis, &ngis));
338   PetscCall(ISGetSize(sis, &nsis));
339   PetscCall(ISGetIndices(sis, &isis));
340   PetscCall(ISGetIndices(gis, &igis));
341 
342   PetscCall(PetscMalloc1(ngis + nsis, &iis));
343   PetscCall(PetscArraycpy(iis, igis, ngis));
344   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
345   n = ngis + nsis;
346   PetscCall(PetscSortRemoveDupsInt(&n, iis));
347   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
348   for (i = 0; i < n; i++) iis[i] += rstart;
349   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
350 
351   PetscCall(ISRestoreIndices(sis, &isis));
352   PetscCall(ISRestoreIndices(gis, &igis));
353   PetscCall(ISDestroy(&sis));
354   PetscCall(ISDestroy(&gis));
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 /*
359   Local utility routine that creates a mapping from the global column
360 number to the local number in the off-diagonal part of the local
361 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
362 a slightly higher hash table cost; without it it is not scalable (each processor
363 has an order N integer array but is fast to access.
364 */
365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
366 {
367   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
368   PetscInt    n   = aij->B->cmap->n, i;
369 
370   PetscFunctionBegin;
371   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
372 #if defined(PETSC_USE_CTABLE)
373   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
374   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
375 #else
376   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
377   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
378 #endif
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
383   do { \
384     if (col <= lastcol1) low1 = 0; \
385     else high1 = nrow1; \
386     lastcol1 = col; \
387     while (high1 - low1 > 5) { \
388       t = (low1 + high1) / 2; \
389       if (rp1[t] > col) high1 = t; \
390       else low1 = t; \
391     } \
392     for (_i = low1; _i < high1; _i++) { \
393       if (rp1[_i] > col) break; \
394       if (rp1[_i] == col) { \
395         if (addv == ADD_VALUES) { \
396           ap1[_i] += value; \
397           /* Not sure LogFlops will slow dow the code or not */ \
398           (void)PetscLogFlops(1.0); \
399         } else ap1[_i] = value; \
400         goto a_noinsert; \
401       } \
402     } \
403     if (value == 0.0 && ignorezeroentries && row != col) { \
404       low1  = 0; \
405       high1 = nrow1; \
406       goto a_noinsert; \
407     } \
408     if (nonew == 1) { \
409       low1  = 0; \
410       high1 = nrow1; \
411       goto a_noinsert; \
412     } \
413     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
414     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
415     N = nrow1++ - 1; \
416     a->nz++; \
417     high1++; \
418     /* shift up all the later entries in this row */ \
419     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
420     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
421     rp1[_i] = col; \
422     ap1[_i] = value; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467   b_noinsert:; \
468     bilen[row] = nrow2; \
469   } while (0)
470 
471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
472 {
473   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
474   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
475   PetscInt     l, *garray                         = mat->garray, diag;
476   PetscScalar *aa, *ba;
477 
478   PetscFunctionBegin;
479   /* code only works for square matrices A */
480 
481   /* find size of row to the left of the diagonal part */
482   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
483   row = row - diag;
484   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
485     if (garray[b->j[b->i[row] + l]] > diag) break;
486   }
487   if (l) {
488     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
489     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
490     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
491   }
492 
493   /* diagonal part */
494   if (a->i[row + 1] - a->i[row]) {
495     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
496     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
497     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
498   }
499 
500   /* right of diagonal part */
501   if (b->i[row + 1] - b->i[row] - l) {
502     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
503     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
504     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
505   }
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
512   PetscScalar value = 0.0;
513   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
514   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
515   PetscBool   roworiented = aij->roworiented;
516 
517   /* Some Variables required in the macro */
518   Mat         A     = aij->A;
519   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
520   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
521   PetscBool   ignorezeroentries = a->ignorezeroentries;
522   Mat         B                 = aij->B;
523   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
524   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
525   MatScalar  *aa, *ba;
526   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
527   PetscInt    nonew;
528   MatScalar  *ap1, *ap2;
529 
530   PetscFunctionBegin;
531   PetscCall(MatSeqAIJGetArray(A, &aa));
532   PetscCall(MatSeqAIJGetArray(B, &ba));
533   for (i = 0; i < m; i++) {
534     if (im[i] < 0) continue;
535     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
536     if (im[i] >= rstart && im[i] < rend) {
537       row      = im[i] - rstart;
538       lastcol1 = -1;
539       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
540       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
541       rmax1    = aimax[row];
542       nrow1    = ailen[row];
543       low1     = 0;
544       high1    = nrow1;
545       lastcol2 = -1;
546       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
547       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
548       rmax2    = bimax[row];
549       nrow2    = bilen[row];
550       low2     = 0;
551       high2    = nrow2;
552 
553       for (j = 0; j < n; j++) {
554         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
555         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
556         if (in[j] >= cstart && in[j] < cend) {
557           col   = in[j] - cstart;
558           nonew = a->nonew;
559           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
560         } else if (in[j] < 0) {
561           continue;
562         } else {
563           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
564           if (mat->was_assembled) {
565             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
566 #if defined(PETSC_USE_CTABLE)
567             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
568             col--;
569 #else
570             col = aij->colmap[in[j]] - 1;
571 #endif
572             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
573               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
574               col = in[j];
575               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
576               B     = aij->B;
577               b     = (Mat_SeqAIJ *)B->data;
578               bimax = b->imax;
579               bi    = b->i;
580               bilen = b->ilen;
581               bj    = b->j;
582               ba    = b->a;
583               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
584               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
585               rmax2 = bimax[row];
586               nrow2 = bilen[row];
587               low2  = 0;
588               high2 = nrow2;
589               bm    = aij->B->rmap->n;
590               ba    = b->a;
591             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
592               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
593                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
594               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
595             }
596           } else col = in[j];
597           nonew = b->nonew;
598           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
599         }
600       }
601     } else {
602       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
603       if (!aij->donotstash) {
604         mat->assembled = PETSC_FALSE;
605         if (roworiented) {
606           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         } else {
608           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
609         }
610       }
611     }
612   }
613   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
614   PetscCall(MatSeqAIJRestoreArray(B, &ba));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 /*
619     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
620     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
621     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
622 */
623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
624 {
625   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
626   Mat         A      = aij->A; /* diagonal part of the matrix */
627   Mat         B      = aij->B; /* off-diagonal part of the matrix */
628   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
629   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
630   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
631   PetscInt   *ailen = a->ilen, *aj = a->j;
632   PetscInt   *bilen = b->ilen, *bj = b->j;
633   PetscInt    am          = aij->A->rmap->n, j;
634   PetscInt    diag_so_far = 0, dnz;
635   PetscInt    offd_so_far = 0, onz;
636 
637   PetscFunctionBegin;
638   /* Iterate over all rows of the matrix */
639   for (j = 0; j < am; j++) {
640     dnz = onz = 0;
641     /*  Iterate over all non-zero columns of the current row */
642     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
643       /* If column is in the diagonal */
644       if (mat_j[col] >= cstart && mat_j[col] < cend) {
645         aj[diag_so_far++] = mat_j[col] - cstart;
646         dnz++;
647       } else { /* off-diagonal entries */
648         bj[offd_so_far++] = mat_j[col];
649         onz++;
650       }
651     }
652     ailen[j] = dnz;
653     bilen[j] = onz;
654   }
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 /*
659     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
660     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
661     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
662     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
663     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
664 */
665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
666 {
667   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
668   Mat          A    = aij->A; /* diagonal part of the matrix */
669   Mat          B    = aij->B; /* off-diagonal part of the matrix */
670   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
671   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
672   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
673   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
674   PetscInt    *ailen = a->ilen, *aj = a->j;
675   PetscInt    *bilen = b->ilen, *bj = b->j;
676   PetscInt     am          = aij->A->rmap->n, j;
677   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
678   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
679   PetscScalar *aa = a->a, *ba = b->a;
680 
681   PetscFunctionBegin;
682   /* Iterate over all rows of the matrix */
683   for (j = 0; j < am; j++) {
684     dnz_row = onz_row = 0;
685     rowstart_offd     = full_offd_i[j];
686     rowstart_diag     = full_diag_i[j];
687     /*  Iterate over all non-zero columns of the current row */
688     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
689       /* If column is in the diagonal */
690       if (mat_j[col] >= cstart && mat_j[col] < cend) {
691         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
692         aa[rowstart_diag + dnz_row] = mat_a[col];
693         dnz_row++;
694       } else { /* off-diagonal entries */
695         bj[rowstart_offd + onz_row] = mat_j[col];
696         ba[rowstart_offd + onz_row] = mat_a[col];
697         onz_row++;
698       }
699     }
700     ailen[j] = dnz_row;
701     bilen[j] = onz_row;
702   }
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
707 {
708   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
709   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
710   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
711 
712   PetscFunctionBegin;
713   for (i = 0; i < m; i++) {
714     if (idxm[i] < 0) continue; /* negative row */
715     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
716     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
717     row = idxm[i] - rstart;
718     for (j = 0; j < n; j++) {
719       if (idxn[j] < 0) continue; /* negative column */
720       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
721       if (idxn[j] >= cstart && idxn[j] < cend) {
722         col = idxn[j] - cstart;
723         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
724       } else {
725         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
726 #if defined(PETSC_USE_CTABLE)
727         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
728         col--;
729 #else
730         col = aij->colmap[idxn[j]] - 1;
731 #endif
732         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
733         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
734       }
735     }
736   }
737   PetscFunctionReturn(PETSC_SUCCESS);
738 }
739 
740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
741 {
742   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
743   PetscInt    nstash, reallocs;
744 
745   PetscFunctionBegin;
746   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
747 
748   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
749   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
750   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
751   PetscFunctionReturn(PETSC_SUCCESS);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
755 {
756   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
757   PetscMPIInt  n;
758   PetscInt     i, j, rstart, ncols, flg;
759   PetscInt    *row, *col;
760   PetscBool    other_disassembled;
761   PetscScalar *val;
762 
763   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
764 
765   PetscFunctionBegin;
766   if (!aij->donotstash && !mat->nooffprocentries) {
767     while (1) {
768       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
769       if (!flg) break;
770 
771       for (i = 0; i < n;) {
772         /* Now identify the consecutive vals belonging to the same row */
773         for (j = i, rstart = row[j]; j < n; j++) {
774           if (row[j] != rstart) break;
775         }
776         if (j < n) ncols = j - i;
777         else ncols = n - i;
778         /* Now assemble all these values with a single function call */
779         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
780         i = j;
781       }
782     }
783     PetscCall(MatStashScatterEnd_Private(&mat->stash));
784   }
785 #if defined(PETSC_HAVE_DEVICE)
786   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
787   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
788   if (mat->boundtocpu) {
789     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
790     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
791   }
792 #endif
793   PetscCall(MatAssemblyBegin(aij->A, mode));
794   PetscCall(MatAssemblyEnd(aij->A, mode));
795 
796   /* determine if any processor has disassembled, if so we must
797      also disassemble ourself, in order that we may reassemble. */
798   /*
799      if nonzero structure of submatrix B cannot change then we know that
800      no processor disassembled thus we can skip this stuff
801   */
802   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
803     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
804     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
805       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
806     }
807   }
808   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
809   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
810 #if defined(PETSC_HAVE_DEVICE)
811   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
812 #endif
813   PetscCall(MatAssemblyBegin(aij->B, mode));
814   PetscCall(MatAssemblyEnd(aij->B, mode));
815 
816   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
817 
818   aij->rowvalues = NULL;
819 
820   PetscCall(VecDestroy(&aij->diag));
821 
822   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
823   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
824     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
825     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
826   }
827 #if defined(PETSC_HAVE_DEVICE)
828   mat->offloadmask = PETSC_OFFLOAD_BOTH;
829 #endif
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
834 {
835   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
836 
837   PetscFunctionBegin;
838   PetscCall(MatZeroEntries(l->A));
839   PetscCall(MatZeroEntries(l->B));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
844 {
845   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
846   PetscInt   *lrows;
847   PetscInt    r, len;
848   PetscBool   cong;
849 
850   PetscFunctionBegin;
851   /* get locally owned rows */
852   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
853   PetscCall(MatHasCongruentLayouts(A, &cong));
854   /* fix right-hand side if needed */
855   if (x && b) {
856     const PetscScalar *xx;
857     PetscScalar       *bb;
858 
859     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
860     PetscCall(VecGetArrayRead(x, &xx));
861     PetscCall(VecGetArray(b, &bb));
862     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
863     PetscCall(VecRestoreArrayRead(x, &xx));
864     PetscCall(VecRestoreArray(b, &bb));
865   }
866 
867   if (diag != 0.0 && cong) {
868     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
869     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
870   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
871     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
872     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
873     PetscInt    nnwA, nnwB;
874     PetscBool   nnzA, nnzB;
875 
876     nnwA = aijA->nonew;
877     nnwB = aijB->nonew;
878     nnzA = aijA->keepnonzeropattern;
879     nnzB = aijB->keepnonzeropattern;
880     if (!nnzA) {
881       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
882       aijA->nonew = 0;
883     }
884     if (!nnzB) {
885       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
886       aijB->nonew = 0;
887     }
888     /* Must zero here before the next loop */
889     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
890     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
891     for (r = 0; r < len; ++r) {
892       const PetscInt row = lrows[r] + A->rmap->rstart;
893       if (row >= A->cmap->N) continue;
894       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
895     }
896     aijA->nonew = nnwA;
897     aijB->nonew = nnwB;
898   } else {
899     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
900     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
901   }
902   PetscCall(PetscFree(lrows));
903   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
904   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
905 
906   /* only change matrix nonzero state if pattern was allowed to be changed */
907   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
908     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
909     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
910   }
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
915 {
916   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
917   PetscInt           n = A->rmap->n;
918   PetscInt           i, j, r, m, len = 0;
919   PetscInt          *lrows, *owners = A->rmap->range;
920   PetscMPIInt        p = 0;
921   PetscSFNode       *rrows;
922   PetscSF            sf;
923   const PetscScalar *xx;
924   PetscScalar       *bb, *mask, *aij_a;
925   Vec                xmask, lmask;
926   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
927   const PetscInt    *aj, *ii, *ridx;
928   PetscScalar       *aa;
929 
930   PetscFunctionBegin;
931   /* Create SF where leaves are input rows and roots are owned rows */
932   PetscCall(PetscMalloc1(n, &lrows));
933   for (r = 0; r < n; ++r) lrows[r] = -1;
934   PetscCall(PetscMalloc1(N, &rrows));
935   for (r = 0; r < N; ++r) {
936     const PetscInt idx = rows[r];
937     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
938     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
939       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
940     }
941     rrows[r].rank  = p;
942     rrows[r].index = rows[r] - owners[p];
943   }
944   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
945   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
946   /* Collect flags for rows to be zeroed */
947   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
948   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
949   PetscCall(PetscSFDestroy(&sf));
950   /* Compress and put in row numbers */
951   for (r = 0; r < n; ++r)
952     if (lrows[r] >= 0) lrows[len++] = r;
953   /* zero diagonal part of matrix */
954   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
955   /* handle off-diagonal part of matrix */
956   PetscCall(MatCreateVecs(A, &xmask, NULL));
957   PetscCall(VecDuplicate(l->lvec, &lmask));
958   PetscCall(VecGetArray(xmask, &bb));
959   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
960   PetscCall(VecRestoreArray(xmask, &bb));
961   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
962   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
963   PetscCall(VecDestroy(&xmask));
964   if (x && b) { /* this code is buggy when the row and column layout don't match */
965     PetscBool cong;
966 
967     PetscCall(MatHasCongruentLayouts(A, &cong));
968     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
969     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
970     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
971     PetscCall(VecGetArrayRead(l->lvec, &xx));
972     PetscCall(VecGetArray(b, &bb));
973   }
974   PetscCall(VecGetArray(lmask, &mask));
975   /* remove zeroed rows of off-diagonal matrix */
976   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
977   ii = aij->i;
978   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
979   /* loop over all elements of off process part of matrix zeroing removed columns*/
980   if (aij->compressedrow.use) {
981     m    = aij->compressedrow.nrows;
982     ii   = aij->compressedrow.i;
983     ridx = aij->compressedrow.rindex;
984     for (i = 0; i < m; i++) {
985       n  = ii[i + 1] - ii[i];
986       aj = aij->j + ii[i];
987       aa = aij_a + ii[i];
988 
989       for (j = 0; j < n; j++) {
990         if (PetscAbsScalar(mask[*aj])) {
991           if (b) bb[*ridx] -= *aa * xx[*aj];
992           *aa = 0.0;
993         }
994         aa++;
995         aj++;
996       }
997       ridx++;
998     }
999   } else { /* do not use compressed row format */
1000     m = l->B->rmap->n;
1001     for (i = 0; i < m; i++) {
1002       n  = ii[i + 1] - ii[i];
1003       aj = aij->j + ii[i];
1004       aa = aij_a + ii[i];
1005       for (j = 0; j < n; j++) {
1006         if (PetscAbsScalar(mask[*aj])) {
1007           if (b) bb[i] -= *aa * xx[*aj];
1008           *aa = 0.0;
1009         }
1010         aa++;
1011         aj++;
1012       }
1013     }
1014   }
1015   if (x && b) {
1016     PetscCall(VecRestoreArray(b, &bb));
1017     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1018   }
1019   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1020   PetscCall(VecRestoreArray(lmask, &mask));
1021   PetscCall(VecDestroy(&lmask));
1022   PetscCall(PetscFree(lrows));
1023 
1024   /* only change matrix nonzero state if pattern was allowed to be changed */
1025   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1026     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1027     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1028   }
1029   PetscFunctionReturn(PETSC_SUCCESS);
1030 }
1031 
1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1033 {
1034   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1035   PetscInt    nt;
1036   VecScatter  Mvctx = a->Mvctx;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(VecGetLocalSize(xx, &nt));
1040   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1041   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->A, mult, xx, yy);
1043   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1044   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1045   PetscFunctionReturn(PETSC_SUCCESS);
1046 }
1047 
1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1049 {
1050   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1051 
1052   PetscFunctionBegin;
1053   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1058 {
1059   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1060   VecScatter  Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1065   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1071 {
1072   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073 
1074   PetscFunctionBegin;
1075   /* do nondiagonal part */
1076   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1077   /* do local part */
1078   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1079   /* add partial results together */
1080   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1081   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1086 {
1087   MPI_Comm    comm;
1088   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1089   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1090   IS          Me, Notme;
1091   PetscInt    M, N, first, last, *notme, i;
1092   PetscBool   lf;
1093   PetscMPIInt size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1098   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1099   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1100   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1101   PetscCallMPI(MPI_Comm_size(comm, &size));
1102   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1103 
1104   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1105   PetscCall(MatGetSize(Amat, &M, &N));
1106   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1107   PetscCall(PetscMalloc1(N - last + first, &notme));
1108   for (i = 0; i < first; i++) notme[i] = i;
1109   for (i = last; i < M; i++) notme[i - last + first] = i;
1110   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1111   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1112   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1113   Aoff = Aoffs[0];
1114   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1115   Boff = Boffs[0];
1116   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1117   PetscCall(MatDestroyMatrices(1, &Aoffs));
1118   PetscCall(MatDestroyMatrices(1, &Boffs));
1119   PetscCall(ISDestroy(&Me));
1120   PetscCall(ISDestroy(&Notme));
1121   PetscCall(PetscFree(notme));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1126 {
1127   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1128 
1129   PetscFunctionBegin;
1130   /* do nondiagonal part */
1131   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1132   /* do local part */
1133   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1134   /* add partial results together */
1135   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1136   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1137   PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139 
1140 /*
1141   This only works correctly for square matrices where the subblock A->A is the
1142    diagonal block
1143 */
1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1150   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1151   PetscCall(MatGetDiagonal(a->A, v));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1156 {
1157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatScale(a->A, aa));
1161   PetscCall(MatScale(a->B, aa));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1166 {
1167   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1168   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1169   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1170   const PetscInt    *garray = aij->garray;
1171   const PetscScalar *aa, *ba;
1172   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1173   PetscInt64         nz, hnz;
1174   PetscInt          *rowlens;
1175   PetscInt          *colidxs;
1176   PetscScalar       *matvals;
1177   PetscMPIInt        rank;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(PetscViewerSetUp(viewer));
1181 
1182   M  = mat->rmap->N;
1183   N  = mat->cmap->N;
1184   m  = mat->rmap->n;
1185   rs = mat->rmap->rstart;
1186   cs = mat->cmap->rstart;
1187   nz = A->nz + B->nz;
1188 
1189   /* write matrix header */
1190   header[0] = MAT_FILE_CLASSID;
1191   header[1] = M;
1192   header[2] = N;
1193   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1194   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1195   if (rank == 0) {
1196     if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX;
1197     else header[3] = (PetscInt)hnz;
1198   }
1199   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1200 
1201   /* fill in and store row lengths  */
1202   PetscCall(PetscMalloc1(m, &rowlens));
1203   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1205   PetscCall(PetscFree(rowlens));
1206 
1207   /* fill in and store column indices */
1208   PetscCall(PetscMalloc1(nz, &colidxs));
1209   for (cnt = 0, i = 0; i < m; i++) {
1210     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1211       if (garray[B->j[jb]] > cs) break;
1212       colidxs[cnt++] = garray[B->j[jb]];
1213     }
1214     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1215     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1216   }
1217   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1218   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1219   PetscCall(PetscFree(colidxs));
1220 
1221   /* fill in and store nonzero values */
1222   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1223   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1224   PetscCall(PetscMalloc1(nz, &matvals));
1225   for (cnt = 0, i = 0; i < m; i++) {
1226     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       matvals[cnt++] = ba[jb];
1229     }
1230     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1231     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1232   }
1233   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1235   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1236   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1237   PetscCall(PetscFree(matvals));
1238 
1239   /* write block size option to the viewer's .info file */
1240   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 #include <petscdraw.h>
1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1246 {
1247   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1248   PetscMPIInt       rank = aij->rank, size = aij->size;
1249   PetscBool         isdraw, iascii, isbinary;
1250   PetscViewer       sviewer;
1251   PetscViewerFormat format;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1255   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1256   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1257   if (iascii) {
1258     PetscCall(PetscViewerGetFormat(viewer, &format));
1259     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1260       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1261       PetscCall(PetscMalloc1(size, &nz));
1262       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1263       for (i = 0; i < (PetscInt)size; i++) {
1264         nmax = PetscMax(nmax, nz[i]);
1265         nmin = PetscMin(nmin, nz[i]);
1266         navg += nz[i];
1267       }
1268       PetscCall(PetscFree(nz));
1269       navg = navg / size;
1270       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1271       PetscFunctionReturn(PETSC_SUCCESS);
1272     }
1273     PetscCall(PetscViewerGetFormat(viewer, &format));
1274     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1275       MatInfo   info;
1276       PetscInt *inodes = NULL;
1277 
1278       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1279       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1280       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1281       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1282       if (!inodes) {
1283         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1284                                                      (double)info.memory));
1285       } else {
1286         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1287                                                      (double)info.memory));
1288       }
1289       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1292       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1293       PetscCall(PetscViewerFlush(viewer));
1294       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1295       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1296       PetscCall(VecScatterView(aij->Mvctx, viewer));
1297       PetscFunctionReturn(PETSC_SUCCESS);
1298     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1299       PetscInt inodecount, inodelimit, *inodes;
1300       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1301       if (inodes) {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1303       } else {
1304         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1305       }
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     }
1310   } else if (isbinary) {
1311     if (size == 1) {
1312       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1313       PetscCall(MatView(aij->A, viewer));
1314     } else {
1315       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1316     }
1317     PetscFunctionReturn(PETSC_SUCCESS);
1318   } else if (iascii && size == 1) {
1319     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1320     PetscCall(MatView(aij->A, viewer));
1321     PetscFunctionReturn(PETSC_SUCCESS);
1322   } else if (isdraw) {
1323     PetscDraw draw;
1324     PetscBool isnull;
1325     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1326     PetscCall(PetscDrawIsNull(draw, &isnull));
1327     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1328   }
1329 
1330   { /* assemble the entire matrix onto first processor */
1331     Mat A = NULL, Av;
1332     IS  isrow, iscol;
1333 
1334     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1335     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1336     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1337     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1338     /*  The commented code uses MatCreateSubMatrices instead */
1339     /*
1340     Mat *AA, A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1344     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1345     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1346     if (rank == 0) {
1347        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1348        A    = AA[0];
1349        Av   = AA[0];
1350     }
1351     PetscCall(MatDestroySubMatrices(1,&AA));
1352 */
1353     PetscCall(ISDestroy(&iscol));
1354     PetscCall(ISDestroy(&isrow));
1355     /*
1356        Everyone has to call to draw the matrix since the graphics waits are
1357        synchronized across all processors that share the PetscDraw object
1358     */
1359     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1360     if (rank == 0) {
1361       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1362       PetscCall(MatView_SeqAIJ(Av, sviewer));
1363     }
1364     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1365     PetscCall(MatDestroy(&A));
1366   }
1367   PetscFunctionReturn(PETSC_SUCCESS);
1368 }
1369 
1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1371 {
1372   PetscBool iascii, isdraw, issocket, isbinary;
1373 
1374   PetscFunctionBegin;
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1377   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1378   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1379   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 
1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1384 {
1385   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1386   Vec         bb1 = NULL;
1387   PetscBool   hasop;
1388 
1389   PetscFunctionBegin;
1390   if (flag == SOR_APPLY_UPPER) {
1391     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1392     PetscFunctionReturn(PETSC_SUCCESS);
1393   }
1394 
1395   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1396 
1397   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1398     if (flag & SOR_ZERO_INITIAL_GUESS) {
1399       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1400       its--;
1401     }
1402 
1403     while (its--) {
1404       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1405       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1406 
1407       /* update rhs: bb1 = bb - B*x */
1408       PetscCall(VecScale(mat->lvec, -1.0));
1409       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1410 
1411       /* local sweep */
1412       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1413     }
1414   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419     while (its--) {
1420       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422 
1423       /* update rhs: bb1 = bb - B*x */
1424       PetscCall(VecScale(mat->lvec, -1.0));
1425       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1426 
1427       /* local sweep */
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1429     }
1430   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1433       its--;
1434     }
1435     while (its--) {
1436       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438 
1439       /* update rhs: bb1 = bb - B*x */
1440       PetscCall(VecScale(mat->lvec, -1.0));
1441       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1442 
1443       /* local sweep */
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1445     }
1446   } else if (flag & SOR_EISENSTAT) {
1447     Vec xx1;
1448 
1449     PetscCall(VecDuplicate(bb, &xx1));
1450     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1451 
1452     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454     if (!mat->diag) {
1455       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1456       PetscCall(MatGetDiagonal(matin, mat->diag));
1457     }
1458     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1459     if (hasop) {
1460       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1461     } else {
1462       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1463     }
1464     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1465 
1466     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1467 
1468     /* local sweep */
1469     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1470     PetscCall(VecAXPY(xx, 1.0, xx1));
1471     PetscCall(VecDestroy(&xx1));
1472   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1473 
1474   PetscCall(VecDestroy(&bb1));
1475 
1476   matin->factorerrortype = mat->A->factorerrortype;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1481 {
1482   Mat             aA, aB, Aperm;
1483   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1484   PetscScalar    *aa, *ba;
1485   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1486   PetscSF         rowsf, sf;
1487   IS              parcolp = NULL;
1488   PetscBool       done;
1489 
1490   PetscFunctionBegin;
1491   PetscCall(MatGetLocalSize(A, &m, &n));
1492   PetscCall(ISGetIndices(rowp, &rwant));
1493   PetscCall(ISGetIndices(colp, &cwant));
1494   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1495 
1496   /* Invert row permutation to find out where my rows should go */
1497   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1498   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1499   PetscCall(PetscSFSetFromOptions(rowsf));
1500   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1501   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1502   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1503 
1504   /* Invert column permutation to find out where my columns should go */
1505   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1506   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1507   PetscCall(PetscSFSetFromOptions(sf));
1508   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1509   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1510   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1511   PetscCall(PetscSFDestroy(&sf));
1512 
1513   PetscCall(ISRestoreIndices(rowp, &rwant));
1514   PetscCall(ISRestoreIndices(colp, &cwant));
1515   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1516 
1517   /* Find out where my gcols should go */
1518   PetscCall(MatGetSize(aB, NULL, &ng));
1519   PetscCall(PetscMalloc1(ng, &gcdest));
1520   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1521   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1522   PetscCall(PetscSFSetFromOptions(sf));
1523   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1524   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1525   PetscCall(PetscSFDestroy(&sf));
1526 
1527   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1528   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1529   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1530   for (i = 0; i < m; i++) {
1531     PetscInt    row = rdest[i];
1532     PetscMPIInt rowner;
1533     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1534     for (j = ai[i]; j < ai[i + 1]; j++) {
1535       PetscInt    col = cdest[aj[j]];
1536       PetscMPIInt cowner;
1537       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1538       if (rowner == cowner) dnnz[i]++;
1539       else onnz[i]++;
1540     }
1541     for (j = bi[i]; j < bi[i + 1]; j++) {
1542       PetscInt    col = gcdest[bj[j]];
1543       PetscMPIInt cowner;
1544       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1545       if (rowner == cowner) dnnz[i]++;
1546       else onnz[i]++;
1547     }
1548   }
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1551   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1552   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1553   PetscCall(PetscSFDestroy(&rowsf));
1554 
1555   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1556   PetscCall(MatSeqAIJGetArray(aA, &aa));
1557   PetscCall(MatSeqAIJGetArray(aB, &ba));
1558   for (i = 0; i < m; i++) {
1559     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1560     PetscInt  j0, rowlen;
1561     rowlen = ai[i + 1] - ai[i];
1562     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1563       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1564       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1565     }
1566     rowlen = bi[i + 1] - bi[i];
1567     for (j0 = j = 0; j < rowlen; j0 = j) {
1568       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1569       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1570     }
1571   }
1572   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1574   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1575   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1576   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1577   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1578   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1579   PetscCall(PetscFree3(work, rdest, cdest));
1580   PetscCall(PetscFree(gcdest));
1581   if (parcolp) PetscCall(ISDestroy(&colp));
1582   *B = Aperm;
1583   PetscFunctionReturn(PETSC_SUCCESS);
1584 }
1585 
1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1587 {
1588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1589 
1590   PetscFunctionBegin;
1591   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1592   if (ghosts) *ghosts = aij->garray;
1593   PetscFunctionReturn(PETSC_SUCCESS);
1594 }
1595 
1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1597 {
1598   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1599   Mat            A = mat->A, B = mat->B;
1600   PetscLogDouble isend[5], irecv[5];
1601 
1602   PetscFunctionBegin;
1603   info->block_size = 1.0;
1604   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1605 
1606   isend[0] = info->nz_used;
1607   isend[1] = info->nz_allocated;
1608   isend[2] = info->nz_unneeded;
1609   isend[3] = info->memory;
1610   isend[4] = info->mallocs;
1611 
1612   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1613 
1614   isend[0] += info->nz_used;
1615   isend[1] += info->nz_allocated;
1616   isend[2] += info->nz_unneeded;
1617   isend[3] += info->memory;
1618   isend[4] += info->mallocs;
1619   if (flag == MAT_LOCAL) {
1620     info->nz_used      = isend[0];
1621     info->nz_allocated = isend[1];
1622     info->nz_unneeded  = isend[2];
1623     info->memory       = isend[3];
1624     info->mallocs      = isend[4];
1625   } else if (flag == MAT_GLOBAL_MAX) {
1626     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1627 
1628     info->nz_used      = irecv[0];
1629     info->nz_allocated = irecv[1];
1630     info->nz_unneeded  = irecv[2];
1631     info->memory       = irecv[3];
1632     info->mallocs      = irecv[4];
1633   } else if (flag == MAT_GLOBAL_SUM) {
1634     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1635 
1636     info->nz_used      = irecv[0];
1637     info->nz_allocated = irecv[1];
1638     info->nz_unneeded  = irecv[2];
1639     info->memory       = irecv[3];
1640     info->mallocs      = irecv[4];
1641   }
1642   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1643   info->fill_ratio_needed = 0;
1644   info->factor_mallocs    = 0;
1645   PetscFunctionReturn(PETSC_SUCCESS);
1646 }
1647 
1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1649 {
1650   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1651 
1652   PetscFunctionBegin;
1653   switch (op) {
1654   case MAT_NEW_NONZERO_LOCATIONS:
1655   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1656   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1657   case MAT_KEEP_NONZERO_PATTERN:
1658   case MAT_NEW_NONZERO_LOCATION_ERR:
1659   case MAT_USE_INODES:
1660   case MAT_IGNORE_ZERO_ENTRIES:
1661   case MAT_FORM_EXPLICIT_TRANSPOSE:
1662     MatCheckPreallocated(A, 1);
1663     PetscCall(MatSetOption(a->A, op, flg));
1664     PetscCall(MatSetOption(a->B, op, flg));
1665     break;
1666   case MAT_ROW_ORIENTED:
1667     MatCheckPreallocated(A, 1);
1668     a->roworiented = flg;
1669 
1670     PetscCall(MatSetOption(a->A, op, flg));
1671     PetscCall(MatSetOption(a->B, op, flg));
1672     break;
1673   case MAT_FORCE_DIAGONAL_ENTRIES:
1674   case MAT_SORTED_FULL:
1675     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1676     break;
1677   case MAT_IGNORE_OFF_PROC_ENTRIES:
1678     a->donotstash = flg;
1679     break;
1680   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1681   case MAT_SPD:
1682   case MAT_SYMMETRIC:
1683   case MAT_STRUCTURALLY_SYMMETRIC:
1684   case MAT_HERMITIAN:
1685   case MAT_SYMMETRY_ETERNAL:
1686   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1687   case MAT_SPD_ETERNAL:
1688     /* if the diagonal matrix is square it inherits some of the properties above */
1689     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1690     break;
1691   case MAT_SUBMAT_SINGLEIS:
1692     A->submat_singleis = flg;
1693     break;
1694   case MAT_STRUCTURE_ONLY:
1695     /* The option is handled directly by MatSetOption() */
1696     break;
1697   default:
1698     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1699   }
1700   PetscFunctionReturn(PETSC_SUCCESS);
1701 }
1702 
1703 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1704 {
1705   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1706   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1707   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1708   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1709   PetscInt    *cmap, *idx_p;
1710 
1711   PetscFunctionBegin;
1712   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1713   mat->getrowactive = PETSC_TRUE;
1714 
1715   if (!mat->rowvalues && (idx || v)) {
1716     /*
1717         allocate enough space to hold information from the longest row.
1718     */
1719     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1720     PetscInt    max = 1, tmp;
1721     for (i = 0; i < matin->rmap->n; i++) {
1722       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1723       if (max < tmp) max = tmp;
1724     }
1725     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1726   }
1727 
1728   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1729   lrow = row - rstart;
1730 
1731   pvA = &vworkA;
1732   pcA = &cworkA;
1733   pvB = &vworkB;
1734   pcB = &cworkB;
1735   if (!v) {
1736     pvA = NULL;
1737     pvB = NULL;
1738   }
1739   if (!idx) {
1740     pcA = NULL;
1741     if (!v) pcB = NULL;
1742   }
1743   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1744   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1745   nztot = nzA + nzB;
1746 
1747   cmap = mat->garray;
1748   if (v || idx) {
1749     if (nztot) {
1750       /* Sort by increasing column numbers, assuming A and B already sorted */
1751       PetscInt imark = -1;
1752       if (v) {
1753         *v = v_p = mat->rowvalues;
1754         for (i = 0; i < nzB; i++) {
1755           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1756           else break;
1757         }
1758         imark = i;
1759         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1760         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1761       }
1762       if (idx) {
1763         *idx = idx_p = mat->rowindices;
1764         if (imark > -1) {
1765           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1766         } else {
1767           for (i = 0; i < nzB; i++) {
1768             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1769             else break;
1770           }
1771           imark = i;
1772         }
1773         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1774         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1775       }
1776     } else {
1777       if (idx) *idx = NULL;
1778       if (v) *v = NULL;
1779     }
1780   }
1781   *nz = nztot;
1782   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1783   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1784   PetscFunctionReturn(PETSC_SUCCESS);
1785 }
1786 
1787 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1788 {
1789   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1790 
1791   PetscFunctionBegin;
1792   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1793   aij->getrowactive = PETSC_FALSE;
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1798 {
1799   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1800   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1801   PetscInt         i, j, cstart = mat->cmap->rstart;
1802   PetscReal        sum = 0.0;
1803   const MatScalar *v, *amata, *bmata;
1804   PetscMPIInt      iN;
1805 
1806   PetscFunctionBegin;
1807   if (aij->size == 1) {
1808     PetscCall(MatNorm(aij->A, type, norm));
1809   } else {
1810     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1811     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1812     if (type == NORM_FROBENIUS) {
1813       v = amata;
1814       for (i = 0; i < amat->nz; i++) {
1815         sum += PetscRealPart(PetscConj(*v) * (*v));
1816         v++;
1817       }
1818       v = bmata;
1819       for (i = 0; i < bmat->nz; i++) {
1820         sum += PetscRealPart(PetscConj(*v) * (*v));
1821         v++;
1822       }
1823       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1824       *norm = PetscSqrtReal(*norm);
1825       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1826     } else if (type == NORM_1) { /* max column norm */
1827       PetscReal *tmp, *tmp2;
1828       PetscInt  *jj, *garray = aij->garray;
1829       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1830       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1831       *norm = 0.0;
1832       v     = amata;
1833       jj    = amat->j;
1834       for (j = 0; j < amat->nz; j++) {
1835         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1836         v++;
1837       }
1838       v  = bmata;
1839       jj = bmat->j;
1840       for (j = 0; j < bmat->nz; j++) {
1841         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1842         v++;
1843       }
1844       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
1845       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1846       for (j = 0; j < mat->cmap->N; j++) {
1847         if (tmp2[j] > *norm) *norm = tmp2[j];
1848       }
1849       PetscCall(PetscFree(tmp));
1850       PetscCall(PetscFree(tmp2));
1851       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1852     } else if (type == NORM_INFINITY) { /* max row norm */
1853       PetscReal ntemp = 0.0;
1854       for (j = 0; j < aij->A->rmap->n; j++) {
1855         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1856         sum = 0.0;
1857         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1858           sum += PetscAbsScalar(*v);
1859           v++;
1860         }
1861         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1862         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1863           sum += PetscAbsScalar(*v);
1864           v++;
1865         }
1866         if (sum > ntemp) ntemp = sum;
1867       }
1868       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1869       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1870     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1871     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1872     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1873   }
1874   PetscFunctionReturn(PETSC_SUCCESS);
1875 }
1876 
1877 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1878 {
1879   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1880   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1881   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1882   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1883   Mat              B, A_diag, *B_diag;
1884   const MatScalar *pbv, *bv;
1885 
1886   PetscFunctionBegin;
1887   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1888   ma = A->rmap->n;
1889   na = A->cmap->n;
1890   mb = a->B->rmap->n;
1891   nb = a->B->cmap->n;
1892   ai = Aloc->i;
1893   aj = Aloc->j;
1894   bi = Bloc->i;
1895   bj = Bloc->j;
1896   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1897     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1898     PetscSFNode         *oloc;
1899     PETSC_UNUSED PetscSF sf;
1900 
1901     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1902     /* compute d_nnz for preallocation */
1903     PetscCall(PetscArrayzero(d_nnz, na));
1904     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1905     /* compute local off-diagonal contributions */
1906     PetscCall(PetscArrayzero(g_nnz, nb));
1907     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1908     /* map those to global */
1909     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1910     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1911     PetscCall(PetscSFSetFromOptions(sf));
1912     PetscCall(PetscArrayzero(o_nnz, na));
1913     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1914     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1915     PetscCall(PetscSFDestroy(&sf));
1916 
1917     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1918     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1919     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1920     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1921     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1922     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1923   } else {
1924     B = *matout;
1925     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1926   }
1927 
1928   b           = (Mat_MPIAIJ *)B->data;
1929   A_diag      = a->A;
1930   B_diag      = &b->A;
1931   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1932   A_diag_ncol = A_diag->cmap->N;
1933   B_diag_ilen = sub_B_diag->ilen;
1934   B_diag_i    = sub_B_diag->i;
1935 
1936   /* Set ilen for diagonal of B */
1937   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1938 
1939   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1940   very quickly (=without using MatSetValues), because all writes are local. */
1941   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1942   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1943 
1944   /* copy over the B part */
1945   PetscCall(PetscMalloc1(bi[mb], &cols));
1946   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1947   pbv = bv;
1948   row = A->rmap->rstart;
1949   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1950   cols_tmp = cols;
1951   for (i = 0; i < mb; i++) {
1952     ncol = bi[i + 1] - bi[i];
1953     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1954     row++;
1955     if (pbv) pbv += ncol;
1956     if (cols_tmp) cols_tmp += ncol;
1957   }
1958   PetscCall(PetscFree(cols));
1959   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1960 
1961   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1962   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1963   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1964     *matout = B;
1965   } else {
1966     PetscCall(MatHeaderMerge(A, &B));
1967   }
1968   PetscFunctionReturn(PETSC_SUCCESS);
1969 }
1970 
1971 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1972 {
1973   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1974   Mat         a = aij->A, b = aij->B;
1975   PetscInt    s1, s2, s3;
1976 
1977   PetscFunctionBegin;
1978   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1979   if (rr) {
1980     PetscCall(VecGetLocalSize(rr, &s1));
1981     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1982     /* Overlap communication with computation. */
1983     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1984   }
1985   if (ll) {
1986     PetscCall(VecGetLocalSize(ll, &s1));
1987     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1988     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1989   }
1990   /* scale  the diagonal block */
1991   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1992 
1993   if (rr) {
1994     /* Do a scatter end and then right scale the off-diagonal block */
1995     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1996     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1997   }
1998   PetscFunctionReturn(PETSC_SUCCESS);
1999 }
2000 
2001 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2002 {
2003   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2004 
2005   PetscFunctionBegin;
2006   PetscCall(MatSetUnfactored(a->A));
2007   PetscFunctionReturn(PETSC_SUCCESS);
2008 }
2009 
2010 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2011 {
2012   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2013   Mat         a, b, c, d;
2014   PetscBool   flg;
2015 
2016   PetscFunctionBegin;
2017   a = matA->A;
2018   b = matA->B;
2019   c = matB->A;
2020   d = matB->B;
2021 
2022   PetscCall(MatEqual(a, c, &flg));
2023   if (flg) PetscCall(MatEqual(b, d, &flg));
2024   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2025   PetscFunctionReturn(PETSC_SUCCESS);
2026 }
2027 
2028 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2029 {
2030   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2031   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2032 
2033   PetscFunctionBegin;
2034   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2035   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2036     /* because of the column compression in the off-processor part of the matrix a->B,
2037        the number of columns in a->B and b->B may be different, hence we cannot call
2038        the MatCopy() directly on the two parts. If need be, we can provide a more
2039        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2040        then copying the submatrices */
2041     PetscCall(MatCopy_Basic(A, B, str));
2042   } else {
2043     PetscCall(MatCopy(a->A, b->A, str));
2044     PetscCall(MatCopy(a->B, b->B, str));
2045   }
2046   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2047   PetscFunctionReturn(PETSC_SUCCESS);
2048 }
2049 
2050 /*
2051    Computes the number of nonzeros per row needed for preallocation when X and Y
2052    have different nonzero structure.
2053 */
2054 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2055 {
2056   PetscInt i, j, k, nzx, nzy;
2057 
2058   PetscFunctionBegin;
2059   /* Set the number of nonzeros in the new matrix */
2060   for (i = 0; i < m; i++) {
2061     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2062     nzx    = xi[i + 1] - xi[i];
2063     nzy    = yi[i + 1] - yi[i];
2064     nnz[i] = 0;
2065     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2066       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2067       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2068       nnz[i]++;
2069     }
2070     for (; k < nzy; k++) nnz[i]++;
2071   }
2072   PetscFunctionReturn(PETSC_SUCCESS);
2073 }
2074 
2075 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2076 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2077 {
2078   PetscInt    m = Y->rmap->N;
2079   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2080   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2081 
2082   PetscFunctionBegin;
2083   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2084   PetscFunctionReturn(PETSC_SUCCESS);
2085 }
2086 
2087 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2088 {
2089   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2090 
2091   PetscFunctionBegin;
2092   if (str == SAME_NONZERO_PATTERN) {
2093     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2094     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2095   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2096     PetscCall(MatAXPY_Basic(Y, a, X, str));
2097   } else {
2098     Mat       B;
2099     PetscInt *nnz_d, *nnz_o;
2100 
2101     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2102     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2103     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2104     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2105     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2106     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2107     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2108     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2109     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2110     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2111     PetscCall(MatHeaderMerge(Y, &B));
2112     PetscCall(PetscFree(nnz_d));
2113     PetscCall(PetscFree(nnz_o));
2114   }
2115   PetscFunctionReturn(PETSC_SUCCESS);
2116 }
2117 
2118 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2119 
2120 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2121 {
2122   PetscFunctionBegin;
2123   if (PetscDefined(USE_COMPLEX)) {
2124     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2125 
2126     PetscCall(MatConjugate_SeqAIJ(aij->A));
2127     PetscCall(MatConjugate_SeqAIJ(aij->B));
2128   }
2129   PetscFunctionReturn(PETSC_SUCCESS);
2130 }
2131 
2132 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2133 {
2134   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2135 
2136   PetscFunctionBegin;
2137   PetscCall(MatRealPart(a->A));
2138   PetscCall(MatRealPart(a->B));
2139   PetscFunctionReturn(PETSC_SUCCESS);
2140 }
2141 
2142 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2143 {
2144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2145 
2146   PetscFunctionBegin;
2147   PetscCall(MatImaginaryPart(a->A));
2148   PetscCall(MatImaginaryPart(a->B));
2149   PetscFunctionReturn(PETSC_SUCCESS);
2150 }
2151 
2152 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2153 {
2154   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2155   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2156   PetscScalar       *vv;
2157   Vec                vB, vA;
2158   const PetscScalar *va, *vb;
2159 
2160   PetscFunctionBegin;
2161   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2162   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2163 
2164   PetscCall(VecGetArrayRead(vA, &va));
2165   if (idx) {
2166     for (i = 0; i < m; i++) {
2167       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2168     }
2169   }
2170 
2171   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2172   PetscCall(PetscMalloc1(m, &idxb));
2173   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2174 
2175   PetscCall(VecGetArrayWrite(v, &vv));
2176   PetscCall(VecGetArrayRead(vB, &vb));
2177   for (i = 0; i < m; i++) {
2178     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2179       vv[i] = vb[i];
2180       if (idx) idx[i] = a->garray[idxb[i]];
2181     } else {
2182       vv[i] = va[i];
2183       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2184     }
2185   }
2186   PetscCall(VecRestoreArrayWrite(v, &vv));
2187   PetscCall(VecRestoreArrayRead(vA, &va));
2188   PetscCall(VecRestoreArrayRead(vB, &vb));
2189   PetscCall(PetscFree(idxb));
2190   PetscCall(VecDestroy(&vA));
2191   PetscCall(VecDestroy(&vB));
2192   PetscFunctionReturn(PETSC_SUCCESS);
2193 }
2194 
2195 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2196 {
2197   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2198   Vec         vB, vA;
2199 
2200   PetscFunctionBegin;
2201   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2202   PetscCall(MatGetRowSumAbs(a->A, vA));
2203   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2204   PetscCall(MatGetRowSumAbs(a->B, vB));
2205   PetscCall(VecAXPY(vA, 1.0, vB));
2206   PetscCall(VecDestroy(&vB));
2207   PetscCall(VecCopy(vA, v));
2208   PetscCall(VecDestroy(&vA));
2209   PetscFunctionReturn(PETSC_SUCCESS);
2210 }
2211 
2212 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2213 {
2214   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2215   PetscInt           m = A->rmap->n, n = A->cmap->n;
2216   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2217   PetscInt          *cmap = mat->garray;
2218   PetscInt          *diagIdx, *offdiagIdx;
2219   Vec                diagV, offdiagV;
2220   PetscScalar       *a, *diagA, *offdiagA;
2221   const PetscScalar *ba, *bav;
2222   PetscInt           r, j, col, ncols, *bi, *bj;
2223   Mat                B = mat->B;
2224   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2225 
2226   PetscFunctionBegin;
2227   /* When a process holds entire A and other processes have no entry */
2228   if (A->cmap->N == n) {
2229     PetscCall(VecGetArrayWrite(v, &diagA));
2230     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2231     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2232     PetscCall(VecDestroy(&diagV));
2233     PetscCall(VecRestoreArrayWrite(v, &diagA));
2234     PetscFunctionReturn(PETSC_SUCCESS);
2235   } else if (n == 0) {
2236     if (m) {
2237       PetscCall(VecGetArrayWrite(v, &a));
2238       for (r = 0; r < m; r++) {
2239         a[r] = 0.0;
2240         if (idx) idx[r] = -1;
2241       }
2242       PetscCall(VecRestoreArrayWrite(v, &a));
2243     }
2244     PetscFunctionReturn(PETSC_SUCCESS);
2245   }
2246 
2247   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2249   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2250   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2251 
2252   /* Get offdiagIdx[] for implicit 0.0 */
2253   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2254   ba = bav;
2255   bi = b->i;
2256   bj = b->j;
2257   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2258   for (r = 0; r < m; r++) {
2259     ncols = bi[r + 1] - bi[r];
2260     if (ncols == A->cmap->N - n) { /* Brow is dense */
2261       offdiagA[r]   = *ba;
2262       offdiagIdx[r] = cmap[0];
2263     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2264       offdiagA[r] = 0.0;
2265 
2266       /* Find first hole in the cmap */
2267       for (j = 0; j < ncols; j++) {
2268         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2269         if (col > j && j < cstart) {
2270           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2271           break;
2272         } else if (col > j + n && j >= cstart) {
2273           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2274           break;
2275         }
2276       }
2277       if (j == ncols && ncols < A->cmap->N - n) {
2278         /* a hole is outside compressed Bcols */
2279         if (ncols == 0) {
2280           if (cstart) {
2281             offdiagIdx[r] = 0;
2282           } else offdiagIdx[r] = cend;
2283         } else { /* ncols > 0 */
2284           offdiagIdx[r] = cmap[ncols - 1] + 1;
2285           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2286         }
2287       }
2288     }
2289 
2290     for (j = 0; j < ncols; j++) {
2291       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2292         offdiagA[r]   = *ba;
2293         offdiagIdx[r] = cmap[*bj];
2294       }
2295       ba++;
2296       bj++;
2297     }
2298   }
2299 
2300   PetscCall(VecGetArrayWrite(v, &a));
2301   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2302   for (r = 0; r < m; ++r) {
2303     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2304       a[r] = diagA[r];
2305       if (idx) idx[r] = cstart + diagIdx[r];
2306     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2307       a[r] = diagA[r];
2308       if (idx) {
2309         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2310           idx[r] = cstart + diagIdx[r];
2311         } else idx[r] = offdiagIdx[r];
2312       }
2313     } else {
2314       a[r] = offdiagA[r];
2315       if (idx) idx[r] = offdiagIdx[r];
2316     }
2317   }
2318   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2319   PetscCall(VecRestoreArrayWrite(v, &a));
2320   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2321   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2322   PetscCall(VecDestroy(&diagV));
2323   PetscCall(VecDestroy(&offdiagV));
2324   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2325   PetscFunctionReturn(PETSC_SUCCESS);
2326 }
2327 
2328 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2329 {
2330   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2331   PetscInt           m = A->rmap->n, n = A->cmap->n;
2332   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2333   PetscInt          *cmap = mat->garray;
2334   PetscInt          *diagIdx, *offdiagIdx;
2335   Vec                diagV, offdiagV;
2336   PetscScalar       *a, *diagA, *offdiagA;
2337   const PetscScalar *ba, *bav;
2338   PetscInt           r, j, col, ncols, *bi, *bj;
2339   Mat                B = mat->B;
2340   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2341 
2342   PetscFunctionBegin;
2343   /* When a process holds entire A and other processes have no entry */
2344   if (A->cmap->N == n) {
2345     PetscCall(VecGetArrayWrite(v, &diagA));
2346     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2347     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2348     PetscCall(VecDestroy(&diagV));
2349     PetscCall(VecRestoreArrayWrite(v, &diagA));
2350     PetscFunctionReturn(PETSC_SUCCESS);
2351   } else if (n == 0) {
2352     if (m) {
2353       PetscCall(VecGetArrayWrite(v, &a));
2354       for (r = 0; r < m; r++) {
2355         a[r] = PETSC_MAX_REAL;
2356         if (idx) idx[r] = -1;
2357       }
2358       PetscCall(VecRestoreArrayWrite(v, &a));
2359     }
2360     PetscFunctionReturn(PETSC_SUCCESS);
2361   }
2362 
2363   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2364   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2365   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2366   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2367 
2368   /* Get offdiagIdx[] for implicit 0.0 */
2369   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2370   ba = bav;
2371   bi = b->i;
2372   bj = b->j;
2373   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2374   for (r = 0; r < m; r++) {
2375     ncols = bi[r + 1] - bi[r];
2376     if (ncols == A->cmap->N - n) { /* Brow is dense */
2377       offdiagA[r]   = *ba;
2378       offdiagIdx[r] = cmap[0];
2379     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2380       offdiagA[r] = 0.0;
2381 
2382       /* Find first hole in the cmap */
2383       for (j = 0; j < ncols; j++) {
2384         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2385         if (col > j && j < cstart) {
2386           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2387           break;
2388         } else if (col > j + n && j >= cstart) {
2389           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2390           break;
2391         }
2392       }
2393       if (j == ncols && ncols < A->cmap->N - n) {
2394         /* a hole is outside compressed Bcols */
2395         if (ncols == 0) {
2396           if (cstart) {
2397             offdiagIdx[r] = 0;
2398           } else offdiagIdx[r] = cend;
2399         } else { /* ncols > 0 */
2400           offdiagIdx[r] = cmap[ncols - 1] + 1;
2401           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2402         }
2403       }
2404     }
2405 
2406     for (j = 0; j < ncols; j++) {
2407       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2408         offdiagA[r]   = *ba;
2409         offdiagIdx[r] = cmap[*bj];
2410       }
2411       ba++;
2412       bj++;
2413     }
2414   }
2415 
2416   PetscCall(VecGetArrayWrite(v, &a));
2417   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2418   for (r = 0; r < m; ++r) {
2419     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2420       a[r] = diagA[r];
2421       if (idx) idx[r] = cstart + diagIdx[r];
2422     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2423       a[r] = diagA[r];
2424       if (idx) {
2425         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2426           idx[r] = cstart + diagIdx[r];
2427         } else idx[r] = offdiagIdx[r];
2428       }
2429     } else {
2430       a[r] = offdiagA[r];
2431       if (idx) idx[r] = offdiagIdx[r];
2432     }
2433   }
2434   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2435   PetscCall(VecRestoreArrayWrite(v, &a));
2436   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2437   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2438   PetscCall(VecDestroy(&diagV));
2439   PetscCall(VecDestroy(&offdiagV));
2440   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2441   PetscFunctionReturn(PETSC_SUCCESS);
2442 }
2443 
2444 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2445 {
2446   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2447   PetscInt           m = A->rmap->n, n = A->cmap->n;
2448   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2449   PetscInt          *cmap = mat->garray;
2450   PetscInt          *diagIdx, *offdiagIdx;
2451   Vec                diagV, offdiagV;
2452   PetscScalar       *a, *diagA, *offdiagA;
2453   const PetscScalar *ba, *bav;
2454   PetscInt           r, j, col, ncols, *bi, *bj;
2455   Mat                B = mat->B;
2456   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2457 
2458   PetscFunctionBegin;
2459   /* When a process holds entire A and other processes have no entry */
2460   if (A->cmap->N == n) {
2461     PetscCall(VecGetArrayWrite(v, &diagA));
2462     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2463     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2464     PetscCall(VecDestroy(&diagV));
2465     PetscCall(VecRestoreArrayWrite(v, &diagA));
2466     PetscFunctionReturn(PETSC_SUCCESS);
2467   } else if (n == 0) {
2468     if (m) {
2469       PetscCall(VecGetArrayWrite(v, &a));
2470       for (r = 0; r < m; r++) {
2471         a[r] = PETSC_MIN_REAL;
2472         if (idx) idx[r] = -1;
2473       }
2474       PetscCall(VecRestoreArrayWrite(v, &a));
2475     }
2476     PetscFunctionReturn(PETSC_SUCCESS);
2477   }
2478 
2479   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2480   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2481   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2482   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2483 
2484   /* Get offdiagIdx[] for implicit 0.0 */
2485   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2486   ba = bav;
2487   bi = b->i;
2488   bj = b->j;
2489   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2490   for (r = 0; r < m; r++) {
2491     ncols = bi[r + 1] - bi[r];
2492     if (ncols == A->cmap->N - n) { /* Brow is dense */
2493       offdiagA[r]   = *ba;
2494       offdiagIdx[r] = cmap[0];
2495     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2496       offdiagA[r] = 0.0;
2497 
2498       /* Find first hole in the cmap */
2499       for (j = 0; j < ncols; j++) {
2500         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2501         if (col > j && j < cstart) {
2502           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2503           break;
2504         } else if (col > j + n && j >= cstart) {
2505           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2506           break;
2507         }
2508       }
2509       if (j == ncols && ncols < A->cmap->N - n) {
2510         /* a hole is outside compressed Bcols */
2511         if (ncols == 0) {
2512           if (cstart) {
2513             offdiagIdx[r] = 0;
2514           } else offdiagIdx[r] = cend;
2515         } else { /* ncols > 0 */
2516           offdiagIdx[r] = cmap[ncols - 1] + 1;
2517           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2518         }
2519       }
2520     }
2521 
2522     for (j = 0; j < ncols; j++) {
2523       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2524         offdiagA[r]   = *ba;
2525         offdiagIdx[r] = cmap[*bj];
2526       }
2527       ba++;
2528       bj++;
2529     }
2530   }
2531 
2532   PetscCall(VecGetArrayWrite(v, &a));
2533   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2534   for (r = 0; r < m; ++r) {
2535     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2536       a[r] = diagA[r];
2537       if (idx) idx[r] = cstart + diagIdx[r];
2538     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2539       a[r] = diagA[r];
2540       if (idx) {
2541         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2542           idx[r] = cstart + diagIdx[r];
2543         } else idx[r] = offdiagIdx[r];
2544       }
2545     } else {
2546       a[r] = offdiagA[r];
2547       if (idx) idx[r] = offdiagIdx[r];
2548     }
2549   }
2550   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2551   PetscCall(VecRestoreArrayWrite(v, &a));
2552   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2553   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2554   PetscCall(VecDestroy(&diagV));
2555   PetscCall(VecDestroy(&offdiagV));
2556   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2557   PetscFunctionReturn(PETSC_SUCCESS);
2558 }
2559 
2560 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2561 {
2562   Mat *dummy;
2563 
2564   PetscFunctionBegin;
2565   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2566   *newmat = *dummy;
2567   PetscCall(PetscFree(dummy));
2568   PetscFunctionReturn(PETSC_SUCCESS);
2569 }
2570 
2571 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2572 {
2573   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2574 
2575   PetscFunctionBegin;
2576   PetscCall(MatInvertBlockDiagonal(a->A, values));
2577   A->factorerrortype = a->A->factorerrortype;
2578   PetscFunctionReturn(PETSC_SUCCESS);
2579 }
2580 
2581 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2582 {
2583   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2584 
2585   PetscFunctionBegin;
2586   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2587   PetscCall(MatSetRandom(aij->A, rctx));
2588   if (x->assembled) {
2589     PetscCall(MatSetRandom(aij->B, rctx));
2590   } else {
2591     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2592   }
2593   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2594   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2595   PetscFunctionReturn(PETSC_SUCCESS);
2596 }
2597 
2598 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2599 {
2600   PetscFunctionBegin;
2601   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2602   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2603   PetscFunctionReturn(PETSC_SUCCESS);
2604 }
2605 
2606 /*@
2607   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2608 
2609   Not Collective
2610 
2611   Input Parameter:
2612 . A - the matrix
2613 
2614   Output Parameter:
2615 . nz - the number of nonzeros
2616 
2617   Level: advanced
2618 
2619 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2620 @*/
2621 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2622 {
2623   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2624   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2625   PetscBool   isaij;
2626 
2627   PetscFunctionBegin;
2628   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2629   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2630   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2631   PetscFunctionReturn(PETSC_SUCCESS);
2632 }
2633 
2634 /*@
2635   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2636 
2637   Collective
2638 
2639   Input Parameters:
2640 + A  - the matrix
2641 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2642 
2643   Level: advanced
2644 
2645 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2646 @*/
2647 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2648 {
2649   PetscFunctionBegin;
2650   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2651   PetscFunctionReturn(PETSC_SUCCESS);
2652 }
2653 
2654 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2655 {
2656   PetscBool sc = PETSC_FALSE, flg;
2657 
2658   PetscFunctionBegin;
2659   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2660   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2661   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2662   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2663   PetscOptionsHeadEnd();
2664   PetscFunctionReturn(PETSC_SUCCESS);
2665 }
2666 
2667 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2668 {
2669   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2670   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2671 
2672   PetscFunctionBegin;
2673   if (!Y->preallocated) {
2674     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2675   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2676     PetscInt nonew = aij->nonew;
2677     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2678     aij->nonew = nonew;
2679   }
2680   PetscCall(MatShift_Basic(Y, a));
2681   PetscFunctionReturn(PETSC_SUCCESS);
2682 }
2683 
2684 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2685 {
2686   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2687 
2688   PetscFunctionBegin;
2689   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2690   PetscCall(MatMissingDiagonal(a->A, missing, d));
2691   if (d) {
2692     PetscInt rstart;
2693     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2694     *d += rstart;
2695   }
2696   PetscFunctionReturn(PETSC_SUCCESS);
2697 }
2698 
2699 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2700 {
2701   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2702 
2703   PetscFunctionBegin;
2704   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2705   PetscFunctionReturn(PETSC_SUCCESS);
2706 }
2707 
2708 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2709 {
2710   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2711 
2712   PetscFunctionBegin;
2713   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2714   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2715   PetscFunctionReturn(PETSC_SUCCESS);
2716 }
2717 
2718 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2719                                        MatGetRow_MPIAIJ,
2720                                        MatRestoreRow_MPIAIJ,
2721                                        MatMult_MPIAIJ,
2722                                        /* 4*/ MatMultAdd_MPIAIJ,
2723                                        MatMultTranspose_MPIAIJ,
2724                                        MatMultTransposeAdd_MPIAIJ,
2725                                        NULL,
2726                                        NULL,
2727                                        NULL,
2728                                        /*10*/ NULL,
2729                                        NULL,
2730                                        NULL,
2731                                        MatSOR_MPIAIJ,
2732                                        MatTranspose_MPIAIJ,
2733                                        /*15*/ MatGetInfo_MPIAIJ,
2734                                        MatEqual_MPIAIJ,
2735                                        MatGetDiagonal_MPIAIJ,
2736                                        MatDiagonalScale_MPIAIJ,
2737                                        MatNorm_MPIAIJ,
2738                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2739                                        MatAssemblyEnd_MPIAIJ,
2740                                        MatSetOption_MPIAIJ,
2741                                        MatZeroEntries_MPIAIJ,
2742                                        /*24*/ MatZeroRows_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        /*29*/ MatSetUp_MPI_Hash,
2748                                        NULL,
2749                                        NULL,
2750                                        MatGetDiagonalBlock_MPIAIJ,
2751                                        NULL,
2752                                        /*34*/ MatDuplicate_MPIAIJ,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                        NULL,
2757                                        /*39*/ MatAXPY_MPIAIJ,
2758                                        MatCreateSubMatrices_MPIAIJ,
2759                                        MatIncreaseOverlap_MPIAIJ,
2760                                        MatGetValues_MPIAIJ,
2761                                        MatCopy_MPIAIJ,
2762                                        /*44*/ MatGetRowMax_MPIAIJ,
2763                                        MatScale_MPIAIJ,
2764                                        MatShift_MPIAIJ,
2765                                        MatDiagonalSet_MPIAIJ,
2766                                        MatZeroRowsColumns_MPIAIJ,
2767                                        /*49*/ MatSetRandom_MPIAIJ,
2768                                        MatGetRowIJ_MPIAIJ,
2769                                        MatRestoreRowIJ_MPIAIJ,
2770                                        NULL,
2771                                        NULL,
2772                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2773                                        NULL,
2774                                        MatSetUnfactored_MPIAIJ,
2775                                        MatPermute_MPIAIJ,
2776                                        NULL,
2777                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2778                                        MatDestroy_MPIAIJ,
2779                                        MatView_MPIAIJ,
2780                                        NULL,
2781                                        NULL,
2782                                        /*64*/ NULL,
2783                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2784                                        NULL,
2785                                        NULL,
2786                                        NULL,
2787                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2788                                        MatGetRowMinAbs_MPIAIJ,
2789                                        NULL,
2790                                        NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*75*/ MatFDColoringApply_AIJ,
2794                                        MatSetFromOptions_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        MatFindZeroDiagonals_MPIAIJ,
2798                                        /*80*/ NULL,
2799                                        NULL,
2800                                        NULL,
2801                                        /*83*/ MatLoad_MPIAIJ,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        /*89*/ NULL,
2808                                        NULL,
2809                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2813                                        NULL,
2814                                        NULL,
2815                                        NULL,
2816                                        MatBindToCPU_MPIAIJ,
2817                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2818                                        NULL,
2819                                        NULL,
2820                                        MatConjugate_MPIAIJ,
2821                                        NULL,
2822                                        /*104*/ MatSetValuesRow_MPIAIJ,
2823                                        MatRealPart_MPIAIJ,
2824                                        MatImaginaryPart_MPIAIJ,
2825                                        NULL,
2826                                        NULL,
2827                                        /*109*/ NULL,
2828                                        NULL,
2829                                        MatGetRowMin_MPIAIJ,
2830                                        NULL,
2831                                        MatMissingDiagonal_MPIAIJ,
2832                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2833                                        NULL,
2834                                        MatGetGhosts_MPIAIJ,
2835                                        NULL,
2836                                        NULL,
2837                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2838                                        NULL,
2839                                        NULL,
2840                                        NULL,
2841                                        MatGetMultiProcBlock_MPIAIJ,
2842                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2843                                        MatGetColumnReductions_MPIAIJ,
2844                                        MatInvertBlockDiagonal_MPIAIJ,
2845                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2846                                        MatCreateSubMatricesMPI_MPIAIJ,
2847                                        /*129*/ NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2851                                        NULL,
2852                                        /*134*/ NULL,
2853                                        NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        NULL,
2857                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2858                                        NULL,
2859                                        NULL,
2860                                        MatFDColoringSetUp_MPIXAIJ,
2861                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2862                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2863                                        /*145*/ NULL,
2864                                        NULL,
2865                                        NULL,
2866                                        MatCreateGraph_Simple_AIJ,
2867                                        NULL,
2868                                        /*150*/ NULL,
2869                                        MatEliminateZeros_MPIAIJ,
2870                                        MatGetRowSumAbs_MPIAIJ,
2871                                        NULL,
2872                                        NULL,
2873                                        NULL};
2874 
2875 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2876 {
2877   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2878 
2879   PetscFunctionBegin;
2880   PetscCall(MatStoreValues(aij->A));
2881   PetscCall(MatStoreValues(aij->B));
2882   PetscFunctionReturn(PETSC_SUCCESS);
2883 }
2884 
2885 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2886 {
2887   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2888 
2889   PetscFunctionBegin;
2890   PetscCall(MatRetrieveValues(aij->A));
2891   PetscCall(MatRetrieveValues(aij->B));
2892   PetscFunctionReturn(PETSC_SUCCESS);
2893 }
2894 
2895 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2896 {
2897   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2898   PetscMPIInt size;
2899 
2900   PetscFunctionBegin;
2901   if (B->hash_active) {
2902     B->ops[0]      = b->cops;
2903     B->hash_active = PETSC_FALSE;
2904   }
2905   PetscCall(PetscLayoutSetUp(B->rmap));
2906   PetscCall(PetscLayoutSetUp(B->cmap));
2907 
2908 #if defined(PETSC_USE_CTABLE)
2909   PetscCall(PetscHMapIDestroy(&b->colmap));
2910 #else
2911   PetscCall(PetscFree(b->colmap));
2912 #endif
2913   PetscCall(PetscFree(b->garray));
2914   PetscCall(VecDestroy(&b->lvec));
2915   PetscCall(VecScatterDestroy(&b->Mvctx));
2916 
2917   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2918 
2919   MatSeqXAIJGetOptions_Private(b->B);
2920   PetscCall(MatDestroy(&b->B));
2921   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2922   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2923   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2924   PetscCall(MatSetType(b->B, MATSEQAIJ));
2925   MatSeqXAIJRestoreOptions_Private(b->B);
2926 
2927   MatSeqXAIJGetOptions_Private(b->A);
2928   PetscCall(MatDestroy(&b->A));
2929   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2930   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2931   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2932   PetscCall(MatSetType(b->A, MATSEQAIJ));
2933   MatSeqXAIJRestoreOptions_Private(b->A);
2934 
2935   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2936   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2937   B->preallocated  = PETSC_TRUE;
2938   B->was_assembled = PETSC_FALSE;
2939   B->assembled     = PETSC_FALSE;
2940   PetscFunctionReturn(PETSC_SUCCESS);
2941 }
2942 
2943 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2944 {
2945   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2946   /* Save the nonzero states of the component matrices because those are what are used to determine
2947     the nonzero state of mat */
2948   PetscObjectState diagstate = b->A->nonzerostate, offdiagstate = b->B->nonzerostate;
2949 
2950   PetscFunctionBegin;
2951   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2952   PetscCall(PetscLayoutSetUp(B->rmap));
2953   PetscCall(PetscLayoutSetUp(B->cmap));
2954   if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2955   else {
2956 #if defined(PETSC_USE_CTABLE)
2957     PetscCall(PetscHMapIDestroy(&b->colmap));
2958 #else
2959     PetscCall(PetscFree(b->colmap));
2960 #endif
2961     PetscCall(PetscFree(b->garray));
2962     PetscCall(VecDestroy(&b->lvec));
2963   }
2964   PetscCall(VecScatterDestroy(&b->Mvctx));
2965 
2966   PetscCall(MatResetPreallocation(b->A));
2967   PetscCall(MatResetPreallocation(b->B));
2968   B->preallocated    = PETSC_TRUE;
2969   B->was_assembled   = PETSC_FALSE;
2970   B->assembled       = PETSC_FALSE;
2971   b->A->nonzerostate = ++diagstate, b->B->nonzerostate = ++offdiagstate;
2972   /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */
2973   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2974   PetscFunctionReturn(PETSC_SUCCESS);
2975 }
2976 
2977 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2978 {
2979   Mat         mat;
2980   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2981 
2982   PetscFunctionBegin;
2983   *newmat = NULL;
2984   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2985   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2986   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2987   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2988   a = (Mat_MPIAIJ *)mat->data;
2989 
2990   mat->factortype = matin->factortype;
2991   mat->assembled  = matin->assembled;
2992   mat->insertmode = NOT_SET_VALUES;
2993 
2994   a->size         = oldmat->size;
2995   a->rank         = oldmat->rank;
2996   a->donotstash   = oldmat->donotstash;
2997   a->roworiented  = oldmat->roworiented;
2998   a->rowindices   = NULL;
2999   a->rowvalues    = NULL;
3000   a->getrowactive = PETSC_FALSE;
3001 
3002   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3003   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3004   if (matin->hash_active) {
3005     PetscCall(MatSetUp(mat));
3006   } else {
3007     mat->preallocated = matin->preallocated;
3008     if (oldmat->colmap) {
3009 #if defined(PETSC_USE_CTABLE)
3010       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3011 #else
3012       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3013       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3014 #endif
3015     } else a->colmap = NULL;
3016     if (oldmat->garray) {
3017       PetscInt len;
3018       len = oldmat->B->cmap->n;
3019       PetscCall(PetscMalloc1(len + 1, &a->garray));
3020       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3021     } else a->garray = NULL;
3022 
3023     /* It may happen MatDuplicate is called with a non-assembled matrix
3024       In fact, MatDuplicate only requires the matrix to be preallocated
3025       This may happen inside a DMCreateMatrix_Shell */
3026     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3027     if (oldmat->Mvctx) {
3028       a->Mvctx = oldmat->Mvctx;
3029       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3030     }
3031     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3032     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3033   }
3034   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3035   *newmat = mat;
3036   PetscFunctionReturn(PETSC_SUCCESS);
3037 }
3038 
3039 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3040 {
3041   PetscBool isbinary, ishdf5;
3042 
3043   PetscFunctionBegin;
3044   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3045   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3046   /* force binary viewer to load .info file if it has not yet done so */
3047   PetscCall(PetscViewerSetUp(viewer));
3048   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3049   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3050   if (isbinary) {
3051     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3052   } else if (ishdf5) {
3053 #if defined(PETSC_HAVE_HDF5)
3054     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3055 #else
3056     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3057 #endif
3058   } else {
3059     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3060   }
3061   PetscFunctionReturn(PETSC_SUCCESS);
3062 }
3063 
3064 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3065 {
3066   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3067   PetscInt    *rowidxs, *colidxs;
3068   PetscScalar *matvals;
3069 
3070   PetscFunctionBegin;
3071   PetscCall(PetscViewerSetUp(viewer));
3072 
3073   /* read in matrix header */
3074   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3075   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3076   M  = header[1];
3077   N  = header[2];
3078   nz = header[3];
3079   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3080   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3081   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3082 
3083   /* set block sizes from the viewer's .info file */
3084   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3085   /* set global sizes if not set already */
3086   if (mat->rmap->N < 0) mat->rmap->N = M;
3087   if (mat->cmap->N < 0) mat->cmap->N = N;
3088   PetscCall(PetscLayoutSetUp(mat->rmap));
3089   PetscCall(PetscLayoutSetUp(mat->cmap));
3090 
3091   /* check if the matrix sizes are correct */
3092   PetscCall(MatGetSize(mat, &rows, &cols));
3093   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3094 
3095   /* read in row lengths and build row indices */
3096   PetscCall(MatGetLocalSize(mat, &m, NULL));
3097   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3098   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3099   rowidxs[0] = 0;
3100   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3101   if (nz != PETSC_INT_MAX) {
3102     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3103     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3104   }
3105 
3106   /* read in column indices and matrix values */
3107   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3108   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3109   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3110   /* store matrix indices and values */
3111   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3112   PetscCall(PetscFree(rowidxs));
3113   PetscCall(PetscFree2(colidxs, matvals));
3114   PetscFunctionReturn(PETSC_SUCCESS);
3115 }
3116 
3117 /* Not scalable because of ISAllGather() unless getting all columns. */
3118 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3119 {
3120   IS          iscol_local;
3121   PetscBool   isstride;
3122   PetscMPIInt gisstride = 0;
3123 
3124   PetscFunctionBegin;
3125   /* check if we are grabbing all columns*/
3126   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3127 
3128   if (isstride) {
3129     PetscInt start, len, mstart, mlen;
3130     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3131     PetscCall(ISGetLocalSize(iscol, &len));
3132     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3133     if (mstart == start && mlen - mstart == len) gisstride = 1;
3134   }
3135 
3136   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3137   if (gisstride) {
3138     PetscInt N;
3139     PetscCall(MatGetSize(mat, NULL, &N));
3140     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3141     PetscCall(ISSetIdentity(iscol_local));
3142     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3143   } else {
3144     PetscInt cbs;
3145     PetscCall(ISGetBlockSize(iscol, &cbs));
3146     PetscCall(ISAllGather(iscol, &iscol_local));
3147     PetscCall(ISSetBlockSize(iscol_local, cbs));
3148   }
3149 
3150   *isseq = iscol_local;
3151   PetscFunctionReturn(PETSC_SUCCESS);
3152 }
3153 
3154 /*
3155  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3156  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3157 
3158  Input Parameters:
3159 +   mat - matrix
3160 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3161            i.e., mat->rstart <= isrow[i] < mat->rend
3162 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3163            i.e., mat->cstart <= iscol[i] < mat->cend
3164 
3165  Output Parameters:
3166 +   isrow_d - sequential row index set for retrieving mat->A
3167 .   iscol_d - sequential  column index set for retrieving mat->A
3168 .   iscol_o - sequential column index set for retrieving mat->B
3169 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3170  */
3171 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3172 {
3173   Vec             x, cmap;
3174   const PetscInt *is_idx;
3175   PetscScalar    *xarray, *cmaparray;
3176   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3177   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3178   Mat             B    = a->B;
3179   Vec             lvec = a->lvec, lcmap;
3180   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3181   MPI_Comm        comm;
3182   VecScatter      Mvctx = a->Mvctx;
3183 
3184   PetscFunctionBegin;
3185   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3186   PetscCall(ISGetLocalSize(iscol, &ncols));
3187 
3188   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3189   PetscCall(MatCreateVecs(mat, &x, NULL));
3190   PetscCall(VecSet(x, -1.0));
3191   PetscCall(VecDuplicate(x, &cmap));
3192   PetscCall(VecSet(cmap, -1.0));
3193 
3194   /* Get start indices */
3195   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3196   isstart -= ncols;
3197   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3198 
3199   PetscCall(ISGetIndices(iscol, &is_idx));
3200   PetscCall(VecGetArray(x, &xarray));
3201   PetscCall(VecGetArray(cmap, &cmaparray));
3202   PetscCall(PetscMalloc1(ncols, &idx));
3203   for (i = 0; i < ncols; i++) {
3204     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3205     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3206     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3207   }
3208   PetscCall(VecRestoreArray(x, &xarray));
3209   PetscCall(VecRestoreArray(cmap, &cmaparray));
3210   PetscCall(ISRestoreIndices(iscol, &is_idx));
3211 
3212   /* Get iscol_d */
3213   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3214   PetscCall(ISGetBlockSize(iscol, &i));
3215   PetscCall(ISSetBlockSize(*iscol_d, i));
3216 
3217   /* Get isrow_d */
3218   PetscCall(ISGetLocalSize(isrow, &m));
3219   rstart = mat->rmap->rstart;
3220   PetscCall(PetscMalloc1(m, &idx));
3221   PetscCall(ISGetIndices(isrow, &is_idx));
3222   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3223   PetscCall(ISRestoreIndices(isrow, &is_idx));
3224 
3225   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3226   PetscCall(ISGetBlockSize(isrow, &i));
3227   PetscCall(ISSetBlockSize(*isrow_d, i));
3228 
3229   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3230   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3231   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3232 
3233   PetscCall(VecDuplicate(lvec, &lcmap));
3234 
3235   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3236   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3237 
3238   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3239   /* off-process column indices */
3240   count = 0;
3241   PetscCall(PetscMalloc1(Bn, &idx));
3242   PetscCall(PetscMalloc1(Bn, &cmap1));
3243 
3244   PetscCall(VecGetArray(lvec, &xarray));
3245   PetscCall(VecGetArray(lcmap, &cmaparray));
3246   for (i = 0; i < Bn; i++) {
3247     if (PetscRealPart(xarray[i]) > -1.0) {
3248       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3249       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3250       count++;
3251     }
3252   }
3253   PetscCall(VecRestoreArray(lvec, &xarray));
3254   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3255 
3256   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3257   /* cannot ensure iscol_o has same blocksize as iscol! */
3258 
3259   PetscCall(PetscFree(idx));
3260   *garray = cmap1;
3261 
3262   PetscCall(VecDestroy(&x));
3263   PetscCall(VecDestroy(&cmap));
3264   PetscCall(VecDestroy(&lcmap));
3265   PetscFunctionReturn(PETSC_SUCCESS);
3266 }
3267 
3268 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3269 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3270 {
3271   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3272   Mat         M = NULL;
3273   MPI_Comm    comm;
3274   IS          iscol_d, isrow_d, iscol_o;
3275   Mat         Asub = NULL, Bsub = NULL;
3276   PetscInt    n;
3277 
3278   PetscFunctionBegin;
3279   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3280 
3281   if (call == MAT_REUSE_MATRIX) {
3282     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3283     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3284     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3285 
3286     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3287     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3288 
3289     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3290     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3291 
3292     /* Update diagonal and off-diagonal portions of submat */
3293     asub = (Mat_MPIAIJ *)(*submat)->data;
3294     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3295     PetscCall(ISGetLocalSize(iscol_o, &n));
3296     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3297     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3298     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3299 
3300   } else { /* call == MAT_INITIAL_MATRIX) */
3301     const PetscInt *garray;
3302     PetscInt        BsubN;
3303 
3304     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3305     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3306 
3307     /* Create local submatrices Asub and Bsub */
3308     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3309     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3310 
3311     /* Create submatrix M */
3312     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3313 
3314     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3315     asub = (Mat_MPIAIJ *)M->data;
3316 
3317     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3318     n = asub->B->cmap->N;
3319     if (BsubN > n) {
3320       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3321       const PetscInt *idx;
3322       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3323       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3324 
3325       PetscCall(PetscMalloc1(n, &idx_new));
3326       j = 0;
3327       PetscCall(ISGetIndices(iscol_o, &idx));
3328       for (i = 0; i < n; i++) {
3329         if (j >= BsubN) break;
3330         while (subgarray[i] > garray[j]) j++;
3331 
3332         if (subgarray[i] == garray[j]) {
3333           idx_new[i] = idx[j++];
3334         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3335       }
3336       PetscCall(ISRestoreIndices(iscol_o, &idx));
3337 
3338       PetscCall(ISDestroy(&iscol_o));
3339       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3340 
3341     } else if (BsubN < n) {
3342       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3343     }
3344 
3345     PetscCall(PetscFree(garray));
3346     *submat = M;
3347 
3348     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3349     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3350     PetscCall(ISDestroy(&isrow_d));
3351 
3352     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3353     PetscCall(ISDestroy(&iscol_d));
3354 
3355     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3356     PetscCall(ISDestroy(&iscol_o));
3357   }
3358   PetscFunctionReturn(PETSC_SUCCESS);
3359 }
3360 
3361 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3362 {
3363   IS        iscol_local = NULL, isrow_d;
3364   PetscInt  csize;
3365   PetscInt  n, i, j, start, end;
3366   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3367   MPI_Comm  comm;
3368 
3369   PetscFunctionBegin;
3370   /* If isrow has same processor distribution as mat,
3371      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3372   if (call == MAT_REUSE_MATRIX) {
3373     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3374     if (isrow_d) {
3375       sameRowDist  = PETSC_TRUE;
3376       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3377     } else {
3378       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3379       if (iscol_local) {
3380         sameRowDist  = PETSC_TRUE;
3381         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3382       }
3383     }
3384   } else {
3385     /* Check if isrow has same processor distribution as mat */
3386     sameDist[0] = PETSC_FALSE;
3387     PetscCall(ISGetLocalSize(isrow, &n));
3388     if (!n) {
3389       sameDist[0] = PETSC_TRUE;
3390     } else {
3391       PetscCall(ISGetMinMax(isrow, &i, &j));
3392       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3393       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3394     }
3395 
3396     /* Check if iscol has same processor distribution as mat */
3397     sameDist[1] = PETSC_FALSE;
3398     PetscCall(ISGetLocalSize(iscol, &n));
3399     if (!n) {
3400       sameDist[1] = PETSC_TRUE;
3401     } else {
3402       PetscCall(ISGetMinMax(iscol, &i, &j));
3403       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3404       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3405     }
3406 
3407     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3408     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3409     sameRowDist = tsameDist[0];
3410   }
3411 
3412   if (sameRowDist) {
3413     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3414       /* isrow and iscol have same processor distribution as mat */
3415       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3416       PetscFunctionReturn(PETSC_SUCCESS);
3417     } else { /* sameRowDist */
3418       /* isrow has same processor distribution as mat */
3419       if (call == MAT_INITIAL_MATRIX) {
3420         PetscBool sorted;
3421         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3422         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3423         PetscCall(ISGetSize(iscol, &i));
3424         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3425 
3426         PetscCall(ISSorted(iscol_local, &sorted));
3427         if (sorted) {
3428           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3429           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3430           PetscFunctionReturn(PETSC_SUCCESS);
3431         }
3432       } else { /* call == MAT_REUSE_MATRIX */
3433         IS iscol_sub;
3434         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3435         if (iscol_sub) {
3436           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3437           PetscFunctionReturn(PETSC_SUCCESS);
3438         }
3439       }
3440     }
3441   }
3442 
3443   /* General case: iscol -> iscol_local which has global size of iscol */
3444   if (call == MAT_REUSE_MATRIX) {
3445     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3446     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3447   } else {
3448     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3449   }
3450 
3451   PetscCall(ISGetLocalSize(iscol, &csize));
3452   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3453 
3454   if (call == MAT_INITIAL_MATRIX) {
3455     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3456     PetscCall(ISDestroy(&iscol_local));
3457   }
3458   PetscFunctionReturn(PETSC_SUCCESS);
3459 }
3460 
3461 /*@C
3462   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3463   and "off-diagonal" part of the matrix in CSR format.
3464 
3465   Collective
3466 
3467   Input Parameters:
3468 + comm   - MPI communicator
3469 . A      - "diagonal" portion of matrix
3470 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3471 - garray - global index of `B` columns
3472 
3473   Output Parameter:
3474 . mat - the matrix, with input `A` as its local diagonal matrix
3475 
3476   Level: advanced
3477 
3478   Notes:
3479   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3480 
3481   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3482 
3483 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3484 @*/
3485 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3486 {
3487   Mat_MPIAIJ        *maij;
3488   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3489   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3490   const PetscScalar *oa;
3491   Mat                Bnew;
3492   PetscInt           m, n, N;
3493   MatType            mpi_mat_type;
3494 
3495   PetscFunctionBegin;
3496   PetscCall(MatCreate(comm, mat));
3497   PetscCall(MatGetSize(A, &m, &n));
3498   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3499   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3500   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3501   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3502 
3503   /* Get global columns of mat */
3504   PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3505 
3506   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3507   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3508   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3509   PetscCall(MatSetType(*mat, mpi_mat_type));
3510 
3511   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3512   maij = (Mat_MPIAIJ *)(*mat)->data;
3513 
3514   (*mat)->preallocated = PETSC_TRUE;
3515 
3516   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3517   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3518 
3519   /* Set A as diagonal portion of *mat */
3520   maij->A = A;
3521 
3522   nz = oi[m];
3523   for (i = 0; i < nz; i++) {
3524     col   = oj[i];
3525     oj[i] = garray[col];
3526   }
3527 
3528   /* Set Bnew as off-diagonal portion of *mat */
3529   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3530   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3531   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3532   bnew        = (Mat_SeqAIJ *)Bnew->data;
3533   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3534   maij->B     = Bnew;
3535 
3536   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3537 
3538   b->free_a  = PETSC_FALSE;
3539   b->free_ij = PETSC_FALSE;
3540   PetscCall(MatDestroy(&B));
3541 
3542   bnew->free_a  = PETSC_TRUE;
3543   bnew->free_ij = PETSC_TRUE;
3544 
3545   /* condense columns of maij->B */
3546   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3547   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3548   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3549   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3550   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3551   PetscFunctionReturn(PETSC_SUCCESS);
3552 }
3553 
3554 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3555 
3556 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3557 {
3558   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3559   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3560   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3561   Mat             M, Msub, B = a->B;
3562   MatScalar      *aa;
3563   Mat_SeqAIJ     *aij;
3564   PetscInt       *garray = a->garray, *colsub, Ncols;
3565   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3566   IS              iscol_sub, iscmap;
3567   const PetscInt *is_idx, *cmap;
3568   PetscBool       allcolumns = PETSC_FALSE;
3569   MPI_Comm        comm;
3570 
3571   PetscFunctionBegin;
3572   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3573   if (call == MAT_REUSE_MATRIX) {
3574     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3575     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3576     PetscCall(ISGetLocalSize(iscol_sub, &count));
3577 
3578     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3579     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3580 
3581     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3582     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3583 
3584     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3585 
3586   } else { /* call == MAT_INITIAL_MATRIX) */
3587     PetscBool flg;
3588 
3589     PetscCall(ISGetLocalSize(iscol, &n));
3590     PetscCall(ISGetSize(iscol, &Ncols));
3591 
3592     /* (1) iscol -> nonscalable iscol_local */
3593     /* Check for special case: each processor gets entire matrix columns */
3594     PetscCall(ISIdentity(iscol_local, &flg));
3595     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3596     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3597     if (allcolumns) {
3598       iscol_sub = iscol_local;
3599       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3600       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3601 
3602     } else {
3603       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3604       PetscInt *idx, *cmap1, k;
3605       PetscCall(PetscMalloc1(Ncols, &idx));
3606       PetscCall(PetscMalloc1(Ncols, &cmap1));
3607       PetscCall(ISGetIndices(iscol_local, &is_idx));
3608       count = 0;
3609       k     = 0;
3610       for (i = 0; i < Ncols; i++) {
3611         j = is_idx[i];
3612         if (j >= cstart && j < cend) {
3613           /* diagonal part of mat */
3614           idx[count]     = j;
3615           cmap1[count++] = i; /* column index in submat */
3616         } else if (Bn) {
3617           /* off-diagonal part of mat */
3618           if (j == garray[k]) {
3619             idx[count]     = j;
3620             cmap1[count++] = i; /* column index in submat */
3621           } else if (j > garray[k]) {
3622             while (j > garray[k] && k < Bn - 1) k++;
3623             if (j == garray[k]) {
3624               idx[count]     = j;
3625               cmap1[count++] = i; /* column index in submat */
3626             }
3627           }
3628         }
3629       }
3630       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3631 
3632       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3633       PetscCall(ISGetBlockSize(iscol, &cbs));
3634       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3635 
3636       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3637     }
3638 
3639     /* (3) Create sequential Msub */
3640     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3641   }
3642 
3643   PetscCall(ISGetLocalSize(iscol_sub, &count));
3644   aij = (Mat_SeqAIJ *)Msub->data;
3645   ii  = aij->i;
3646   PetscCall(ISGetIndices(iscmap, &cmap));
3647 
3648   /*
3649       m - number of local rows
3650       Ncols - number of columns (same on all processors)
3651       rstart - first row in new global matrix generated
3652   */
3653   PetscCall(MatGetSize(Msub, &m, NULL));
3654 
3655   if (call == MAT_INITIAL_MATRIX) {
3656     /* (4) Create parallel newmat */
3657     PetscMPIInt rank, size;
3658     PetscInt    csize;
3659 
3660     PetscCallMPI(MPI_Comm_size(comm, &size));
3661     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3662 
3663     /*
3664         Determine the number of non-zeros in the diagonal and off-diagonal
3665         portions of the matrix in order to do correct preallocation
3666     */
3667 
3668     /* first get start and end of "diagonal" columns */
3669     PetscCall(ISGetLocalSize(iscol, &csize));
3670     if (csize == PETSC_DECIDE) {
3671       PetscCall(ISGetSize(isrow, &mglobal));
3672       if (mglobal == Ncols) { /* square matrix */
3673         nlocal = m;
3674       } else {
3675         nlocal = Ncols / size + ((Ncols % size) > rank);
3676       }
3677     } else {
3678       nlocal = csize;
3679     }
3680     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3681     rstart = rend - nlocal;
3682     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3683 
3684     /* next, compute all the lengths */
3685     jj = aij->j;
3686     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3687     olens = dlens + m;
3688     for (i = 0; i < m; i++) {
3689       jend = ii[i + 1] - ii[i];
3690       olen = 0;
3691       dlen = 0;
3692       for (j = 0; j < jend; j++) {
3693         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3694         else dlen++;
3695         jj++;
3696       }
3697       olens[i] = olen;
3698       dlens[i] = dlen;
3699     }
3700 
3701     PetscCall(ISGetBlockSize(isrow, &bs));
3702     PetscCall(ISGetBlockSize(iscol, &cbs));
3703 
3704     PetscCall(MatCreate(comm, &M));
3705     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3706     PetscCall(MatSetBlockSizes(M, bs, cbs));
3707     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3708     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3709     PetscCall(PetscFree(dlens));
3710 
3711   } else { /* call == MAT_REUSE_MATRIX */
3712     M = *newmat;
3713     PetscCall(MatGetLocalSize(M, &i, NULL));
3714     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3715     PetscCall(MatZeroEntries(M));
3716     /*
3717          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3718        rather than the slower MatSetValues().
3719     */
3720     M->was_assembled = PETSC_TRUE;
3721     M->assembled     = PETSC_FALSE;
3722   }
3723 
3724   /* (5) Set values of Msub to *newmat */
3725   PetscCall(PetscMalloc1(count, &colsub));
3726   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3727 
3728   jj = aij->j;
3729   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3730   for (i = 0; i < m; i++) {
3731     row = rstart + i;
3732     nz  = ii[i + 1] - ii[i];
3733     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3734     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3735     jj += nz;
3736     aa += nz;
3737   }
3738   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3739   PetscCall(ISRestoreIndices(iscmap, &cmap));
3740 
3741   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3742   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3743 
3744   PetscCall(PetscFree(colsub));
3745 
3746   /* save Msub, iscol_sub and iscmap used in processor for next request */
3747   if (call == MAT_INITIAL_MATRIX) {
3748     *newmat = M;
3749     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3750     PetscCall(MatDestroy(&Msub));
3751 
3752     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3753     PetscCall(ISDestroy(&iscol_sub));
3754 
3755     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3756     PetscCall(ISDestroy(&iscmap));
3757 
3758     if (iscol_local) {
3759       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3760       PetscCall(ISDestroy(&iscol_local));
3761     }
3762   }
3763   PetscFunctionReturn(PETSC_SUCCESS);
3764 }
3765 
3766 /*
3767     Not great since it makes two copies of the submatrix, first an SeqAIJ
3768   in local and then by concatenating the local matrices the end result.
3769   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3770 
3771   This requires a sequential iscol with all indices.
3772 */
3773 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3774 {
3775   PetscMPIInt rank, size;
3776   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3777   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3778   Mat         M, Mreuse;
3779   MatScalar  *aa, *vwork;
3780   MPI_Comm    comm;
3781   Mat_SeqAIJ *aij;
3782   PetscBool   colflag, allcolumns = PETSC_FALSE;
3783 
3784   PetscFunctionBegin;
3785   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3786   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3787   PetscCallMPI(MPI_Comm_size(comm, &size));
3788 
3789   /* Check for special case: each processor gets entire matrix columns */
3790   PetscCall(ISIdentity(iscol, &colflag));
3791   PetscCall(ISGetLocalSize(iscol, &n));
3792   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3793   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3794 
3795   if (call == MAT_REUSE_MATRIX) {
3796     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3797     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3798     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3799   } else {
3800     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3801   }
3802 
3803   /*
3804       m - number of local rows
3805       n - number of columns (same on all processors)
3806       rstart - first row in new global matrix generated
3807   */
3808   PetscCall(MatGetSize(Mreuse, &m, &n));
3809   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3810   if (call == MAT_INITIAL_MATRIX) {
3811     aij = (Mat_SeqAIJ *)Mreuse->data;
3812     ii  = aij->i;
3813     jj  = aij->j;
3814 
3815     /*
3816         Determine the number of non-zeros in the diagonal and off-diagonal
3817         portions of the matrix in order to do correct preallocation
3818     */
3819 
3820     /* first get start and end of "diagonal" columns */
3821     if (csize == PETSC_DECIDE) {
3822       PetscCall(ISGetSize(isrow, &mglobal));
3823       if (mglobal == n) { /* square matrix */
3824         nlocal = m;
3825       } else {
3826         nlocal = n / size + ((n % size) > rank);
3827       }
3828     } else {
3829       nlocal = csize;
3830     }
3831     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3832     rstart = rend - nlocal;
3833     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3834 
3835     /* next, compute all the lengths */
3836     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3837     olens = dlens + m;
3838     for (i = 0; i < m; i++) {
3839       jend = ii[i + 1] - ii[i];
3840       olen = 0;
3841       dlen = 0;
3842       for (j = 0; j < jend; j++) {
3843         if (*jj < rstart || *jj >= rend) olen++;
3844         else dlen++;
3845         jj++;
3846       }
3847       olens[i] = olen;
3848       dlens[i] = dlen;
3849     }
3850     PetscCall(MatCreate(comm, &M));
3851     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3852     PetscCall(MatSetBlockSizes(M, bs, cbs));
3853     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3854     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3855     PetscCall(PetscFree(dlens));
3856   } else {
3857     PetscInt ml, nl;
3858 
3859     M = *newmat;
3860     PetscCall(MatGetLocalSize(M, &ml, &nl));
3861     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3862     PetscCall(MatZeroEntries(M));
3863     /*
3864          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3865        rather than the slower MatSetValues().
3866     */
3867     M->was_assembled = PETSC_TRUE;
3868     M->assembled     = PETSC_FALSE;
3869   }
3870   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3871   aij = (Mat_SeqAIJ *)Mreuse->data;
3872   ii  = aij->i;
3873   jj  = aij->j;
3874 
3875   /* trigger copy to CPU if needed */
3876   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3877   for (i = 0; i < m; i++) {
3878     row   = rstart + i;
3879     nz    = ii[i + 1] - ii[i];
3880     cwork = jj;
3881     jj    = PetscSafePointerPlusOffset(jj, nz);
3882     vwork = aa;
3883     aa    = PetscSafePointerPlusOffset(aa, nz);
3884     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3885   }
3886   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3887 
3888   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3889   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3890   *newmat = M;
3891 
3892   /* save submatrix used in processor for next request */
3893   if (call == MAT_INITIAL_MATRIX) {
3894     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3895     PetscCall(MatDestroy(&Mreuse));
3896   }
3897   PetscFunctionReturn(PETSC_SUCCESS);
3898 }
3899 
3900 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3901 {
3902   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3903   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3904   const PetscInt *JJ;
3905   PetscBool       nooffprocentries;
3906   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3907 
3908   PetscFunctionBegin;
3909   PetscCall(PetscLayoutSetUp(B->rmap));
3910   PetscCall(PetscLayoutSetUp(B->cmap));
3911   m       = B->rmap->n;
3912   cstart  = B->cmap->rstart;
3913   cend    = B->cmap->rend;
3914   rstart  = B->rmap->rstart;
3915   irstart = Ii[0];
3916 
3917   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3918 
3919   if (PetscDefined(USE_DEBUG)) {
3920     for (i = 0; i < m; i++) {
3921       nnz = Ii[i + 1] - Ii[i];
3922       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3923       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3924       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3925       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3926     }
3927   }
3928 
3929   for (i = 0; i < m; i++) {
3930     nnz     = Ii[i + 1] - Ii[i];
3931     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3932     nnz_max = PetscMax(nnz_max, nnz);
3933     d       = 0;
3934     for (j = 0; j < nnz; j++) {
3935       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3936     }
3937     d_nnz[i] = d;
3938     o_nnz[i] = nnz - d;
3939   }
3940   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3941   PetscCall(PetscFree2(d_nnz, o_nnz));
3942 
3943   for (i = 0; i < m; i++) {
3944     ii = i + rstart;
3945     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3946   }
3947   nooffprocentries    = B->nooffprocentries;
3948   B->nooffprocentries = PETSC_TRUE;
3949   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3950   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3951   B->nooffprocentries = nooffprocentries;
3952 
3953   /* count number of entries below block diagonal */
3954   PetscCall(PetscFree(Aij->ld));
3955   PetscCall(PetscCalloc1(m, &ld));
3956   Aij->ld = ld;
3957   for (i = 0; i < m; i++) {
3958     nnz = Ii[i + 1] - Ii[i];
3959     j   = 0;
3960     while (j < nnz && J[j] < cstart) j++;
3961     ld[i] = j;
3962     if (J) J += nnz;
3963   }
3964 
3965   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3966   PetscFunctionReturn(PETSC_SUCCESS);
3967 }
3968 
3969 /*@
3970   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3971   (the default parallel PETSc format).
3972 
3973   Collective
3974 
3975   Input Parameters:
3976 + B - the matrix
3977 . i - the indices into `j` for the start of each local row (indices start with zero)
3978 . j - the column indices for each local row (indices start with zero)
3979 - v - optional values in the matrix
3980 
3981   Level: developer
3982 
3983   Notes:
3984   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3985   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3986   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3987 
3988   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3989 
3990   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3991 
3992   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3993 
3994   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3995   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3996 
3997   The format which is used for the sparse matrix input, is equivalent to a
3998   row-major ordering.. i.e for the following matrix, the input data expected is
3999   as shown
4000 .vb
4001         1 0 0
4002         2 0 3     P0
4003        -------
4004         4 5 6     P1
4005 
4006      Process0 [P0] rows_owned=[0,1]
4007         i =  {0,1,3}  [size = nrow+1  = 2+1]
4008         j =  {0,0,2}  [size = 3]
4009         v =  {1,2,3}  [size = 3]
4010 
4011      Process1 [P1] rows_owned=[2]
4012         i =  {0,3}    [size = nrow+1  = 1+1]
4013         j =  {0,1,2}  [size = 3]
4014         v =  {4,5,6}  [size = 3]
4015 .ve
4016 
4017 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4018           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4019 @*/
4020 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4021 {
4022   PetscFunctionBegin;
4023   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4024   PetscFunctionReturn(PETSC_SUCCESS);
4025 }
4026 
4027 /*@
4028   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4029   (the default parallel PETSc format).  For good matrix assembly performance
4030   the user should preallocate the matrix storage by setting the parameters
4031   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4032 
4033   Collective
4034 
4035   Input Parameters:
4036 + B     - the matrix
4037 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4038            (same value is used for all local rows)
4039 . d_nnz - array containing the number of nonzeros in the various rows of the
4040            DIAGONAL portion of the local submatrix (possibly different for each row)
4041            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4042            The size of this array is equal to the number of local rows, i.e 'm'.
4043            For matrices that will be factored, you must leave room for (and set)
4044            the diagonal entry even if it is zero.
4045 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4046            submatrix (same value is used for all local rows).
4047 - o_nnz - array containing the number of nonzeros in the various rows of the
4048            OFF-DIAGONAL portion of the local submatrix (possibly different for
4049            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4050            structure. The size of this array is equal to the number
4051            of local rows, i.e 'm'.
4052 
4053   Example Usage:
4054   Consider the following 8x8 matrix with 34 non-zero values, that is
4055   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4056   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4057   as follows
4058 
4059 .vb
4060             1  2  0  |  0  3  0  |  0  4
4061     Proc0   0  5  6  |  7  0  0  |  8  0
4062             9  0 10  | 11  0  0  | 12  0
4063     -------------------------------------
4064            13  0 14  | 15 16 17  |  0  0
4065     Proc1   0 18  0  | 19 20 21  |  0  0
4066             0  0  0  | 22 23  0  | 24  0
4067     -------------------------------------
4068     Proc2  25 26 27  |  0  0 28  | 29  0
4069            30  0  0  | 31 32 33  |  0 34
4070 .ve
4071 
4072   This can be represented as a collection of submatrices as
4073 .vb
4074       A B C
4075       D E F
4076       G H I
4077 .ve
4078 
4079   Where the submatrices A,B,C are owned by proc0, D,E,F are
4080   owned by proc1, G,H,I are owned by proc2.
4081 
4082   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4083   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4084   The 'M','N' parameters are 8,8, and have the same values on all procs.
4085 
4086   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4087   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4088   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4089   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4090   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4091   matrix, and [DF] as another `MATSEQAIJ` matrix.
4092 
4093   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4094   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4095   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4096   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4097   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4098   In this case, the values of `d_nz`, `o_nz` are
4099 .vb
4100      proc0  dnz = 2, o_nz = 2
4101      proc1  dnz = 3, o_nz = 2
4102      proc2  dnz = 1, o_nz = 4
4103 .ve
4104   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4105   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4106   for proc3. i.e we are using 12+15+10=37 storage locations to store
4107   34 values.
4108 
4109   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4110   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4111   In the above case the values for `d_nnz`, `o_nnz` are
4112 .vb
4113      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4114      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4115      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4116 .ve
4117   Here the space allocated is sum of all the above values i.e 34, and
4118   hence pre-allocation is perfect.
4119 
4120   Level: intermediate
4121 
4122   Notes:
4123   If the *_nnz parameter is given then the *_nz parameter is ignored
4124 
4125   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4126   storage.  The stored row and column indices begin with zero.
4127   See [Sparse Matrices](sec_matsparse) for details.
4128 
4129   The parallel matrix is partitioned such that the first m0 rows belong to
4130   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4131   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4132 
4133   The DIAGONAL portion of the local submatrix of a processor can be defined
4134   as the submatrix which is obtained by extraction the part corresponding to
4135   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4136   first row that belongs to the processor, r2 is the last row belonging to
4137   the this processor, and c1-c2 is range of indices of the local part of a
4138   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4139   common case of a square matrix, the row and column ranges are the same and
4140   the DIAGONAL part is also square. The remaining portion of the local
4141   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4142 
4143   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4144 
4145   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4146   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4147   You can also run with the option `-info` and look for messages with the string
4148   malloc in them to see if additional memory allocation was needed.
4149 
4150 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4151           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4152 @*/
4153 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4154 {
4155   PetscFunctionBegin;
4156   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4157   PetscValidType(B, 1);
4158   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4159   PetscFunctionReturn(PETSC_SUCCESS);
4160 }
4161 
4162 /*@
4163   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4164   CSR format for the local rows.
4165 
4166   Collective
4167 
4168   Input Parameters:
4169 + comm - MPI communicator
4170 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4171 . n    - This value should be the same as the local size used in creating the
4172          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4173          calculated if `N` is given) For square matrices n is almost always `m`.
4174 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4175 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4176 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4177 . j    - global column indices
4178 - a    - optional matrix values
4179 
4180   Output Parameter:
4181 . mat - the matrix
4182 
4183   Level: intermediate
4184 
4185   Notes:
4186   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4187   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4188   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4189 
4190   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4191 
4192   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4193 
4194   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4195   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4196 
4197   The format which is used for the sparse matrix input, is equivalent to a
4198   row-major ordering, i.e., for the following matrix, the input data expected is
4199   as shown
4200 .vb
4201         1 0 0
4202         2 0 3     P0
4203        -------
4204         4 5 6     P1
4205 
4206      Process0 [P0] rows_owned=[0,1]
4207         i =  {0,1,3}  [size = nrow+1  = 2+1]
4208         j =  {0,0,2}  [size = 3]
4209         v =  {1,2,3}  [size = 3]
4210 
4211      Process1 [P1] rows_owned=[2]
4212         i =  {0,3}    [size = nrow+1  = 1+1]
4213         j =  {0,1,2}  [size = 3]
4214         v =  {4,5,6}  [size = 3]
4215 .ve
4216 
4217 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4218           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4219 @*/
4220 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4221 {
4222   PetscFunctionBegin;
4223   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4224   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4225   PetscCall(MatCreate(comm, mat));
4226   PetscCall(MatSetSizes(*mat, m, n, M, N));
4227   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4228   PetscCall(MatSetType(*mat, MATMPIAIJ));
4229   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4230   PetscFunctionReturn(PETSC_SUCCESS);
4231 }
4232 
4233 /*@
4234   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4235   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4236   from `MatCreateMPIAIJWithArrays()`
4237 
4238   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4239 
4240   Collective
4241 
4242   Input Parameters:
4243 + mat - the matrix
4244 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4245 . n   - This value should be the same as the local size used in creating the
4246        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4247        calculated if N is given) For square matrices n is almost always m.
4248 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4249 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4250 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4251 . J   - column indices
4252 - v   - matrix values
4253 
4254   Level: deprecated
4255 
4256 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4257           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4258 @*/
4259 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4260 {
4261   PetscInt        nnz, i;
4262   PetscBool       nooffprocentries;
4263   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4264   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4265   PetscScalar    *ad, *ao;
4266   PetscInt        ldi, Iii, md;
4267   const PetscInt *Adi = Ad->i;
4268   PetscInt       *ld  = Aij->ld;
4269 
4270   PetscFunctionBegin;
4271   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4272   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4273   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4274   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4275 
4276   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4277   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4278 
4279   for (i = 0; i < m; i++) {
4280     if (PetscDefined(USE_DEBUG)) {
4281       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4282         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4283         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4284       }
4285     }
4286     nnz = Ii[i + 1] - Ii[i];
4287     Iii = Ii[i];
4288     ldi = ld[i];
4289     md  = Adi[i + 1] - Adi[i];
4290     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4291     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4292     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4293     ad += md;
4294     ao += nnz - md;
4295   }
4296   nooffprocentries      = mat->nooffprocentries;
4297   mat->nooffprocentries = PETSC_TRUE;
4298   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4299   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4300   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4301   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4302   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4303   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4304   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4305   mat->nooffprocentries = nooffprocentries;
4306   PetscFunctionReturn(PETSC_SUCCESS);
4307 }
4308 
4309 /*@
4310   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4311 
4312   Collective
4313 
4314   Input Parameters:
4315 + mat - the matrix
4316 - v   - matrix values, stored by row
4317 
4318   Level: intermediate
4319 
4320   Notes:
4321   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4322 
4323   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4324 
4325 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4326           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4327 @*/
4328 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4329 {
4330   PetscInt        nnz, i, m;
4331   PetscBool       nooffprocentries;
4332   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4333   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4334   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4335   PetscScalar    *ad, *ao;
4336   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4337   PetscInt        ldi, Iii, md;
4338   PetscInt       *ld = Aij->ld;
4339 
4340   PetscFunctionBegin;
4341   m = mat->rmap->n;
4342 
4343   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4344   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4345   Iii = 0;
4346   for (i = 0; i < m; i++) {
4347     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4348     ldi = ld[i];
4349     md  = Adi[i + 1] - Adi[i];
4350     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4351     ad += md;
4352     if (ao) {
4353       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4354       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4355       ao += nnz - md;
4356     }
4357     Iii += nnz;
4358   }
4359   nooffprocentries      = mat->nooffprocentries;
4360   mat->nooffprocentries = PETSC_TRUE;
4361   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4362   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4363   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4364   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4365   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4366   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4367   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4368   mat->nooffprocentries = nooffprocentries;
4369   PetscFunctionReturn(PETSC_SUCCESS);
4370 }
4371 
4372 /*@
4373   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4374   (the default parallel PETSc format).  For good matrix assembly performance
4375   the user should preallocate the matrix storage by setting the parameters
4376   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4377 
4378   Collective
4379 
4380   Input Parameters:
4381 + comm  - MPI communicator
4382 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4383           This value should be the same as the local size used in creating the
4384           y vector for the matrix-vector product y = Ax.
4385 . n     - This value should be the same as the local size used in creating the
4386           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4387           calculated if N is given) For square matrices n is almost always m.
4388 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4389 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4390 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4391           (same value is used for all local rows)
4392 . d_nnz - array containing the number of nonzeros in the various rows of the
4393           DIAGONAL portion of the local submatrix (possibly different for each row)
4394           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4395           The size of this array is equal to the number of local rows, i.e 'm'.
4396 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4397           submatrix (same value is used for all local rows).
4398 - o_nnz - array containing the number of nonzeros in the various rows of the
4399           OFF-DIAGONAL portion of the local submatrix (possibly different for
4400           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4401           structure. The size of this array is equal to the number
4402           of local rows, i.e 'm'.
4403 
4404   Output Parameter:
4405 . A - the matrix
4406 
4407   Options Database Keys:
4408 + -mat_no_inode                     - Do not use inodes
4409 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4410 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4411                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4412                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4413 
4414   Level: intermediate
4415 
4416   Notes:
4417   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4418   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4419   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4420 
4421   If the *_nnz parameter is given then the *_nz parameter is ignored
4422 
4423   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4424   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4425   storage requirements for this matrix.
4426 
4427   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4428   processor than it must be used on all processors that share the object for
4429   that argument.
4430 
4431   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4432   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4433 
4434   The user MUST specify either the local or global matrix dimensions
4435   (possibly both).
4436 
4437   The parallel matrix is partitioned across processors such that the
4438   first `m0` rows belong to process 0, the next `m1` rows belong to
4439   process 1, the next `m2` rows belong to process 2, etc., where
4440   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4441   values corresponding to [m x N] submatrix.
4442 
4443   The columns are logically partitioned with the n0 columns belonging
4444   to 0th partition, the next n1 columns belonging to the next
4445   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4446 
4447   The DIAGONAL portion of the local submatrix on any given processor
4448   is the submatrix corresponding to the rows and columns m,n
4449   corresponding to the given processor. i.e diagonal matrix on
4450   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4451   etc. The remaining portion of the local submatrix [m x (N-n)]
4452   constitute the OFF-DIAGONAL portion. The example below better
4453   illustrates this concept. The two matrices, the DIAGONAL portion and
4454   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4455 
4456   For a square global matrix we define each processor's diagonal portion
4457   to be its local rows and the corresponding columns (a square submatrix);
4458   each processor's off-diagonal portion encompasses the remainder of the
4459   local matrix (a rectangular submatrix).
4460 
4461   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4462 
4463   When calling this routine with a single process communicator, a matrix of
4464   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4465   type of communicator, use the construction mechanism
4466 .vb
4467   MatCreate(..., &A);
4468   MatSetType(A, MATMPIAIJ);
4469   MatSetSizes(A, m, n, M, N);
4470   MatMPIAIJSetPreallocation(A, ...);
4471 .ve
4472 
4473   By default, this format uses inodes (identical nodes) when possible.
4474   We search for consecutive rows with the same nonzero structure, thereby
4475   reusing matrix information to achieve increased efficiency.
4476 
4477   Example Usage:
4478   Consider the following 8x8 matrix with 34 non-zero values, that is
4479   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4480   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4481   as follows
4482 
4483 .vb
4484             1  2  0  |  0  3  0  |  0  4
4485     Proc0   0  5  6  |  7  0  0  |  8  0
4486             9  0 10  | 11  0  0  | 12  0
4487     -------------------------------------
4488            13  0 14  | 15 16 17  |  0  0
4489     Proc1   0 18  0  | 19 20 21  |  0  0
4490             0  0  0  | 22 23  0  | 24  0
4491     -------------------------------------
4492     Proc2  25 26 27  |  0  0 28  | 29  0
4493            30  0  0  | 31 32 33  |  0 34
4494 .ve
4495 
4496   This can be represented as a collection of submatrices as
4497 
4498 .vb
4499       A B C
4500       D E F
4501       G H I
4502 .ve
4503 
4504   Where the submatrices A,B,C are owned by proc0, D,E,F are
4505   owned by proc1, G,H,I are owned by proc2.
4506 
4507   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4508   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4509   The 'M','N' parameters are 8,8, and have the same values on all procs.
4510 
4511   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4512   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4513   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4514   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4515   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4516   matrix, and [DF] as another SeqAIJ matrix.
4517 
4518   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4519   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4520   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4521   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4522   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4523   In this case, the values of `d_nz`,`o_nz` are
4524 .vb
4525      proc0  dnz = 2, o_nz = 2
4526      proc1  dnz = 3, o_nz = 2
4527      proc2  dnz = 1, o_nz = 4
4528 .ve
4529   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4530   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4531   for proc3. i.e we are using 12+15+10=37 storage locations to store
4532   34 values.
4533 
4534   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4535   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4536   In the above case the values for d_nnz,o_nnz are
4537 .vb
4538      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4539      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4540      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4541 .ve
4542   Here the space allocated is sum of all the above values i.e 34, and
4543   hence pre-allocation is perfect.
4544 
4545 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4546           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4547           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4548 @*/
4549 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4550 {
4551   PetscMPIInt size;
4552 
4553   PetscFunctionBegin;
4554   PetscCall(MatCreate(comm, A));
4555   PetscCall(MatSetSizes(*A, m, n, M, N));
4556   PetscCallMPI(MPI_Comm_size(comm, &size));
4557   if (size > 1) {
4558     PetscCall(MatSetType(*A, MATMPIAIJ));
4559     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4560   } else {
4561     PetscCall(MatSetType(*A, MATSEQAIJ));
4562     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4563   }
4564   PetscFunctionReturn(PETSC_SUCCESS);
4565 }
4566 
4567 /*MC
4568     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4569 
4570     Synopsis:
4571     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4572 
4573     Not Collective
4574 
4575     Input Parameter:
4576 .   A - the `MATMPIAIJ` matrix
4577 
4578     Output Parameters:
4579 +   Ad - the diagonal portion of the matrix
4580 .   Ao - the off-diagonal portion of the matrix
4581 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4582 -   ierr - error code
4583 
4584      Level: advanced
4585 
4586     Note:
4587     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4588 
4589 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4590 M*/
4591 
4592 /*MC
4593     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4594 
4595     Synopsis:
4596     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4597 
4598     Not Collective
4599 
4600     Input Parameters:
4601 +   A - the `MATMPIAIJ` matrix
4602 .   Ad - the diagonal portion of the matrix
4603 .   Ao - the off-diagonal portion of the matrix
4604 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4605 -   ierr - error code
4606 
4607      Level: advanced
4608 
4609 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4610 M*/
4611 
4612 /*@C
4613   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4614 
4615   Not Collective
4616 
4617   Input Parameter:
4618 . A - The `MATMPIAIJ` matrix
4619 
4620   Output Parameters:
4621 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4622 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4623 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4624 
4625   Level: intermediate
4626 
4627   Note:
4628   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4629   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4630   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4631   local column numbers to global column numbers in the original matrix.
4632 
4633   Fortran Notes:
4634   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4635 
4636 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4637 @*/
4638 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4639 {
4640   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4641   PetscBool   flg;
4642 
4643   PetscFunctionBegin;
4644   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4645   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4646   if (Ad) *Ad = a->A;
4647   if (Ao) *Ao = a->B;
4648   if (colmap) *colmap = a->garray;
4649   PetscFunctionReturn(PETSC_SUCCESS);
4650 }
4651 
4652 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4653 {
4654   PetscInt     m, N, i, rstart, nnz, Ii;
4655   PetscInt    *indx;
4656   PetscScalar *values;
4657   MatType      rootType;
4658 
4659   PetscFunctionBegin;
4660   PetscCall(MatGetSize(inmat, &m, &N));
4661   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4662     PetscInt *dnz, *onz, sum, bs, cbs;
4663 
4664     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4665     /* Check sum(n) = N */
4666     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4667     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4668 
4669     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4670     rstart -= m;
4671 
4672     MatPreallocateBegin(comm, m, n, dnz, onz);
4673     for (i = 0; i < m; i++) {
4674       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4675       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4676       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4677     }
4678 
4679     PetscCall(MatCreate(comm, outmat));
4680     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4681     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4682     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4683     PetscCall(MatGetRootType_Private(inmat, &rootType));
4684     PetscCall(MatSetType(*outmat, rootType));
4685     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4686     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4687     MatPreallocateEnd(dnz, onz);
4688     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4689   }
4690 
4691   /* numeric phase */
4692   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4693   for (i = 0; i < m; i++) {
4694     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4695     Ii = i + rstart;
4696     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4697     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4698   }
4699   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4700   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4701   PetscFunctionReturn(PETSC_SUCCESS);
4702 }
4703 
4704 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4705 {
4706   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4707 
4708   PetscFunctionBegin;
4709   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4710   PetscCall(PetscFree(merge->id_r));
4711   PetscCall(PetscFree(merge->len_s));
4712   PetscCall(PetscFree(merge->len_r));
4713   PetscCall(PetscFree(merge->bi));
4714   PetscCall(PetscFree(merge->bj));
4715   PetscCall(PetscFree(merge->buf_ri[0]));
4716   PetscCall(PetscFree(merge->buf_ri));
4717   PetscCall(PetscFree(merge->buf_rj[0]));
4718   PetscCall(PetscFree(merge->buf_rj));
4719   PetscCall(PetscFree(merge->coi));
4720   PetscCall(PetscFree(merge->coj));
4721   PetscCall(PetscFree(merge->owners_co));
4722   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4723   PetscCall(PetscFree(merge));
4724   PetscFunctionReturn(PETSC_SUCCESS);
4725 }
4726 
4727 #include <../src/mat/utils/freespace.h>
4728 #include <petscbt.h>
4729 
4730 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4731 {
4732   MPI_Comm             comm;
4733   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4734   PetscMPIInt          size, rank, taga, *len_s;
4735   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4736   PetscMPIInt          proc, k;
4737   PetscInt           **buf_ri, **buf_rj;
4738   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4739   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4740   MPI_Request         *s_waits, *r_waits;
4741   MPI_Status          *status;
4742   const MatScalar     *aa, *a_a;
4743   MatScalar          **abuf_r, *ba_i;
4744   Mat_Merge_SeqsToMPI *merge;
4745   PetscContainer       container;
4746 
4747   PetscFunctionBegin;
4748   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4749   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4750 
4751   PetscCallMPI(MPI_Comm_size(comm, &size));
4752   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4753 
4754   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4755   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4756   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4757   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4758   aa = a_a;
4759 
4760   bi     = merge->bi;
4761   bj     = merge->bj;
4762   buf_ri = merge->buf_ri;
4763   buf_rj = merge->buf_rj;
4764 
4765   PetscCall(PetscMalloc1(size, &status));
4766   owners = merge->rowmap->range;
4767   len_s  = merge->len_s;
4768 
4769   /* send and recv matrix values */
4770   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4771   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4772 
4773   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4774   for (proc = 0, k = 0; proc < size; proc++) {
4775     if (!len_s[proc]) continue;
4776     i = owners[proc];
4777     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4778     k++;
4779   }
4780 
4781   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4782   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4783   PetscCall(PetscFree(status));
4784 
4785   PetscCall(PetscFree(s_waits));
4786   PetscCall(PetscFree(r_waits));
4787 
4788   /* insert mat values of mpimat */
4789   PetscCall(PetscMalloc1(N, &ba_i));
4790   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4791 
4792   for (k = 0; k < merge->nrecv; k++) {
4793     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4794     nrows       = *buf_ri_k[k];
4795     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4796     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4797   }
4798 
4799   /* set values of ba */
4800   m = merge->rowmap->n;
4801   for (i = 0; i < m; i++) {
4802     arow = owners[rank] + i;
4803     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4804     bnzi = bi[i + 1] - bi[i];
4805     PetscCall(PetscArrayzero(ba_i, bnzi));
4806 
4807     /* add local non-zero vals of this proc's seqmat into ba */
4808     anzi   = ai[arow + 1] - ai[arow];
4809     aj     = a->j + ai[arow];
4810     aa     = a_a + ai[arow];
4811     nextaj = 0;
4812     for (j = 0; nextaj < anzi; j++) {
4813       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4814         ba_i[j] += aa[nextaj++];
4815       }
4816     }
4817 
4818     /* add received vals into ba */
4819     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4820       /* i-th row */
4821       if (i == *nextrow[k]) {
4822         anzi   = *(nextai[k] + 1) - *nextai[k];
4823         aj     = buf_rj[k] + *nextai[k];
4824         aa     = abuf_r[k] + *nextai[k];
4825         nextaj = 0;
4826         for (j = 0; nextaj < anzi; j++) {
4827           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4828             ba_i[j] += aa[nextaj++];
4829           }
4830         }
4831         nextrow[k]++;
4832         nextai[k]++;
4833       }
4834     }
4835     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4836   }
4837   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4838   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4839   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4840 
4841   PetscCall(PetscFree(abuf_r[0]));
4842   PetscCall(PetscFree(abuf_r));
4843   PetscCall(PetscFree(ba_i));
4844   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4845   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4846   PetscFunctionReturn(PETSC_SUCCESS);
4847 }
4848 
4849 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4850 {
4851   Mat                  B_mpi;
4852   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4853   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4854   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4855   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4856   PetscInt             len, *dnz, *onz, bs, cbs;
4857   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4858   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4859   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4860   MPI_Status          *status;
4861   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4862   PetscBT              lnkbt;
4863   Mat_Merge_SeqsToMPI *merge;
4864   PetscContainer       container;
4865 
4866   PetscFunctionBegin;
4867   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4868 
4869   /* make sure it is a PETSc comm */
4870   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4871   PetscCallMPI(MPI_Comm_size(comm, &size));
4872   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4873 
4874   PetscCall(PetscNew(&merge));
4875   PetscCall(PetscMalloc1(size, &status));
4876 
4877   /* determine row ownership */
4878   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4879   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4880   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4881   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4882   PetscCall(PetscLayoutSetUp(merge->rowmap));
4883   PetscCall(PetscMalloc1(size, &len_si));
4884   PetscCall(PetscMalloc1(size, &merge->len_s));
4885 
4886   m      = merge->rowmap->n;
4887   owners = merge->rowmap->range;
4888 
4889   /* determine the number of messages to send, their lengths */
4890   len_s = merge->len_s;
4891 
4892   len          = 0; /* length of buf_si[] */
4893   merge->nsend = 0;
4894   for (PetscMPIInt proc = 0; proc < size; proc++) {
4895     len_si[proc] = 0;
4896     if (proc == rank) {
4897       len_s[proc] = 0;
4898     } else {
4899       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4900       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4901     }
4902     if (len_s[proc]) {
4903       merge->nsend++;
4904       nrows = 0;
4905       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4906         if (ai[i + 1] > ai[i]) nrows++;
4907       }
4908       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4909       len += len_si[proc];
4910     }
4911   }
4912 
4913   /* determine the number and length of messages to receive for ij-structure */
4914   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4915   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4916 
4917   /* post the Irecv of j-structure */
4918   PetscCall(PetscCommGetNewTag(comm, &tagj));
4919   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4920 
4921   /* post the Isend of j-structure */
4922   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4923 
4924   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4925     if (!len_s[proc]) continue;
4926     i = owners[proc];
4927     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4928     k++;
4929   }
4930 
4931   /* receives and sends of j-structure are complete */
4932   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4933   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4934 
4935   /* send and recv i-structure */
4936   PetscCall(PetscCommGetNewTag(comm, &tagi));
4937   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4938 
4939   PetscCall(PetscMalloc1(len + 1, &buf_s));
4940   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4941   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4942     if (!len_s[proc]) continue;
4943     /* form outgoing message for i-structure:
4944          buf_si[0]:                 nrows to be sent
4945                [1:nrows]:           row index (global)
4946                [nrows+1:2*nrows+1]: i-structure index
4947     */
4948     nrows       = len_si[proc] / 2 - 1;
4949     buf_si_i    = buf_si + nrows + 1;
4950     buf_si[0]   = nrows;
4951     buf_si_i[0] = 0;
4952     nrows       = 0;
4953     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4954       anzi = ai[i + 1] - ai[i];
4955       if (anzi) {
4956         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4957         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4958         nrows++;
4959       }
4960     }
4961     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4962     k++;
4963     buf_si += len_si[proc];
4964   }
4965 
4966   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4967   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4968 
4969   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4970   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4971 
4972   PetscCall(PetscFree(len_si));
4973   PetscCall(PetscFree(len_ri));
4974   PetscCall(PetscFree(rj_waits));
4975   PetscCall(PetscFree2(si_waits, sj_waits));
4976   PetscCall(PetscFree(ri_waits));
4977   PetscCall(PetscFree(buf_s));
4978   PetscCall(PetscFree(status));
4979 
4980   /* compute a local seq matrix in each processor */
4981   /* allocate bi array and free space for accumulating nonzero column info */
4982   PetscCall(PetscMalloc1(m + 1, &bi));
4983   bi[0] = 0;
4984 
4985   /* create and initialize a linked list */
4986   nlnk = N + 1;
4987   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4988 
4989   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4990   len = ai[owners[rank + 1]] - ai[owners[rank]];
4991   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4992 
4993   current_space = free_space;
4994 
4995   /* determine symbolic info for each local row */
4996   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4997 
4998   for (k = 0; k < merge->nrecv; k++) {
4999     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5000     nrows       = *buf_ri_k[k];
5001     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5002     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5003   }
5004 
5005   MatPreallocateBegin(comm, m, n, dnz, onz);
5006   len = 0;
5007   for (i = 0; i < m; i++) {
5008     bnzi = 0;
5009     /* add local non-zero cols of this proc's seqmat into lnk */
5010     arow = owners[rank] + i;
5011     anzi = ai[arow + 1] - ai[arow];
5012     aj   = a->j + ai[arow];
5013     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5014     bnzi += nlnk;
5015     /* add received col data into lnk */
5016     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5017       if (i == *nextrow[k]) {            /* i-th row */
5018         anzi = *(nextai[k] + 1) - *nextai[k];
5019         aj   = buf_rj[k] + *nextai[k];
5020         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5021         bnzi += nlnk;
5022         nextrow[k]++;
5023         nextai[k]++;
5024       }
5025     }
5026     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5027 
5028     /* if free space is not available, make more free space */
5029     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5030     /* copy data into free space, then initialize lnk */
5031     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5032     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5033 
5034     current_space->array += bnzi;
5035     current_space->local_used += bnzi;
5036     current_space->local_remaining -= bnzi;
5037 
5038     bi[i + 1] = bi[i] + bnzi;
5039   }
5040 
5041   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5042 
5043   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5044   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5045   PetscCall(PetscLLDestroy(lnk, lnkbt));
5046 
5047   /* create symbolic parallel matrix B_mpi */
5048   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5049   PetscCall(MatCreate(comm, &B_mpi));
5050   if (n == PETSC_DECIDE) {
5051     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5052   } else {
5053     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5054   }
5055   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5056   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5057   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5058   MatPreallocateEnd(dnz, onz);
5059   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5060 
5061   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5062   B_mpi->assembled = PETSC_FALSE;
5063   merge->bi        = bi;
5064   merge->bj        = bj;
5065   merge->buf_ri    = buf_ri;
5066   merge->buf_rj    = buf_rj;
5067   merge->coi       = NULL;
5068   merge->coj       = NULL;
5069   merge->owners_co = NULL;
5070 
5071   PetscCall(PetscCommDestroy(&comm));
5072 
5073   /* attach the supporting struct to B_mpi for reuse */
5074   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5075   PetscCall(PetscContainerSetPointer(container, merge));
5076   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5077   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5078   PetscCall(PetscContainerDestroy(&container));
5079   *mpimat = B_mpi;
5080 
5081   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5082   PetscFunctionReturn(PETSC_SUCCESS);
5083 }
5084 
5085 /*@
5086   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5087   matrices from each processor
5088 
5089   Collective
5090 
5091   Input Parameters:
5092 + comm   - the communicators the parallel matrix will live on
5093 . seqmat - the input sequential matrices
5094 . m      - number of local rows (or `PETSC_DECIDE`)
5095 . n      - number of local columns (or `PETSC_DECIDE`)
5096 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5097 
5098   Output Parameter:
5099 . mpimat - the parallel matrix generated
5100 
5101   Level: advanced
5102 
5103   Note:
5104   The dimensions of the sequential matrix in each processor MUST be the same.
5105   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5106   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5107 
5108 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5109 @*/
5110 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5111 {
5112   PetscMPIInt size;
5113 
5114   PetscFunctionBegin;
5115   PetscCallMPI(MPI_Comm_size(comm, &size));
5116   if (size == 1) {
5117     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5118     if (scall == MAT_INITIAL_MATRIX) {
5119       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5120     } else {
5121       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5122     }
5123     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5124     PetscFunctionReturn(PETSC_SUCCESS);
5125   }
5126   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5127   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5128   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5129   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5130   PetscFunctionReturn(PETSC_SUCCESS);
5131 }
5132 
5133 /*@
5134   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5135 
5136   Not Collective
5137 
5138   Input Parameter:
5139 . A - the matrix
5140 
5141   Output Parameter:
5142 . A_loc - the local sequential matrix generated
5143 
5144   Level: developer
5145 
5146   Notes:
5147   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5148   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5149   `n` is the global column count obtained with `MatGetSize()`
5150 
5151   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5152 
5153   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5154 
5155   Destroy the matrix with `MatDestroy()`
5156 
5157 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5158 @*/
5159 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5160 {
5161   PetscBool mpi;
5162 
5163   PetscFunctionBegin;
5164   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5165   if (mpi) {
5166     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5167   } else {
5168     *A_loc = A;
5169     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5170   }
5171   PetscFunctionReturn(PETSC_SUCCESS);
5172 }
5173 
5174 /*@
5175   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5176 
5177   Not Collective
5178 
5179   Input Parameters:
5180 + A     - the matrix
5181 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5182 
5183   Output Parameter:
5184 . A_loc - the local sequential matrix generated
5185 
5186   Level: developer
5187 
5188   Notes:
5189   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5190   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5191   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5192 
5193   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5194 
5195   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5196   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5197   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5198   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5199 
5200 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5201 @*/
5202 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5203 {
5204   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5205   Mat_SeqAIJ        *mat, *a, *b;
5206   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5207   const PetscScalar *aa, *ba, *aav, *bav;
5208   PetscScalar       *ca, *cam;
5209   PetscMPIInt        size;
5210   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5211   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5212   PetscBool          match;
5213 
5214   PetscFunctionBegin;
5215   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5216   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5217   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5218   if (size == 1) {
5219     if (scall == MAT_INITIAL_MATRIX) {
5220       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5221       *A_loc = mpimat->A;
5222     } else if (scall == MAT_REUSE_MATRIX) {
5223       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5224     }
5225     PetscFunctionReturn(PETSC_SUCCESS);
5226   }
5227 
5228   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5229   a  = (Mat_SeqAIJ *)mpimat->A->data;
5230   b  = (Mat_SeqAIJ *)mpimat->B->data;
5231   ai = a->i;
5232   aj = a->j;
5233   bi = b->i;
5234   bj = b->j;
5235   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5236   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5237   aa = aav;
5238   ba = bav;
5239   if (scall == MAT_INITIAL_MATRIX) {
5240     PetscCall(PetscMalloc1(1 + am, &ci));
5241     ci[0] = 0;
5242     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5243     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5244     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5245     k = 0;
5246     for (i = 0; i < am; i++) {
5247       ncols_o = bi[i + 1] - bi[i];
5248       ncols_d = ai[i + 1] - ai[i];
5249       /* off-diagonal portion of A */
5250       for (jo = 0; jo < ncols_o; jo++) {
5251         col = cmap[*bj];
5252         if (col >= cstart) break;
5253         cj[k] = col;
5254         bj++;
5255         ca[k++] = *ba++;
5256       }
5257       /* diagonal portion of A */
5258       for (j = 0; j < ncols_d; j++) {
5259         cj[k]   = cstart + *aj++;
5260         ca[k++] = *aa++;
5261       }
5262       /* off-diagonal portion of A */
5263       for (j = jo; j < ncols_o; j++) {
5264         cj[k]   = cmap[*bj++];
5265         ca[k++] = *ba++;
5266       }
5267     }
5268     /* put together the new matrix */
5269     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5270     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5271     /* Since these are PETSc arrays, change flags to free them as necessary. */
5272     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5273     mat->free_a  = PETSC_TRUE;
5274     mat->free_ij = PETSC_TRUE;
5275     mat->nonew   = 0;
5276   } else if (scall == MAT_REUSE_MATRIX) {
5277     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5278     ci  = mat->i;
5279     cj  = mat->j;
5280     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5281     for (i = 0; i < am; i++) {
5282       /* off-diagonal portion of A */
5283       ncols_o = bi[i + 1] - bi[i];
5284       for (jo = 0; jo < ncols_o; jo++) {
5285         col = cmap[*bj];
5286         if (col >= cstart) break;
5287         *cam++ = *ba++;
5288         bj++;
5289       }
5290       /* diagonal portion of A */
5291       ncols_d = ai[i + 1] - ai[i];
5292       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5293       /* off-diagonal portion of A */
5294       for (j = jo; j < ncols_o; j++) {
5295         *cam++ = *ba++;
5296         bj++;
5297       }
5298     }
5299     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5300   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5301   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5302   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5303   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5304   PetscFunctionReturn(PETSC_SUCCESS);
5305 }
5306 
5307 /*@
5308   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5309   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5310 
5311   Not Collective
5312 
5313   Input Parameters:
5314 + A     - the matrix
5315 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5316 
5317   Output Parameters:
5318 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5319 - A_loc - the local sequential matrix generated
5320 
5321   Level: developer
5322 
5323   Note:
5324   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5325   part, then those associated with the off-diagonal part (in its local ordering)
5326 
5327 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5328 @*/
5329 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5330 {
5331   Mat             Ao, Ad;
5332   const PetscInt *cmap;
5333   PetscMPIInt     size;
5334   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5335 
5336   PetscFunctionBegin;
5337   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5338   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5339   if (size == 1) {
5340     if (scall == MAT_INITIAL_MATRIX) {
5341       PetscCall(PetscObjectReference((PetscObject)Ad));
5342       *A_loc = Ad;
5343     } else if (scall == MAT_REUSE_MATRIX) {
5344       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5345     }
5346     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5347     PetscFunctionReturn(PETSC_SUCCESS);
5348   }
5349   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5350   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5351   if (f) {
5352     PetscCall((*f)(A, scall, glob, A_loc));
5353   } else {
5354     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5355     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5356     Mat_SeqAIJ        *c;
5357     PetscInt          *ai = a->i, *aj = a->j;
5358     PetscInt          *bi = b->i, *bj = b->j;
5359     PetscInt          *ci, *cj;
5360     const PetscScalar *aa, *ba;
5361     PetscScalar       *ca;
5362     PetscInt           i, j, am, dn, on;
5363 
5364     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5365     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5366     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5367     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5368     if (scall == MAT_INITIAL_MATRIX) {
5369       PetscInt k;
5370       PetscCall(PetscMalloc1(1 + am, &ci));
5371       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5372       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5373       ci[0] = 0;
5374       for (i = 0, k = 0; i < am; i++) {
5375         const PetscInt ncols_o = bi[i + 1] - bi[i];
5376         const PetscInt ncols_d = ai[i + 1] - ai[i];
5377         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5378         /* diagonal portion of A */
5379         for (j = 0; j < ncols_d; j++, k++) {
5380           cj[k] = *aj++;
5381           ca[k] = *aa++;
5382         }
5383         /* off-diagonal portion of A */
5384         for (j = 0; j < ncols_o; j++, k++) {
5385           cj[k] = dn + *bj++;
5386           ca[k] = *ba++;
5387         }
5388       }
5389       /* put together the new matrix */
5390       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5391       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5392       /* Since these are PETSc arrays, change flags to free them as necessary. */
5393       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5394       c->free_a  = PETSC_TRUE;
5395       c->free_ij = PETSC_TRUE;
5396       c->nonew   = 0;
5397       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5398     } else if (scall == MAT_REUSE_MATRIX) {
5399       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5400       for (i = 0; i < am; i++) {
5401         const PetscInt ncols_d = ai[i + 1] - ai[i];
5402         const PetscInt ncols_o = bi[i + 1] - bi[i];
5403         /* diagonal portion of A */
5404         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5405         /* off-diagonal portion of A */
5406         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5407       }
5408       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5409     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5410     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5411     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5412     if (glob) {
5413       PetscInt cst, *gidx;
5414 
5415       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5416       PetscCall(PetscMalloc1(dn + on, &gidx));
5417       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5418       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5419       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5420     }
5421   }
5422   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5423   PetscFunctionReturn(PETSC_SUCCESS);
5424 }
5425 
5426 /*@C
5427   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5428 
5429   Not Collective
5430 
5431   Input Parameters:
5432 + A     - the matrix
5433 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5434 . row   - index set of rows to extract (or `NULL`)
5435 - col   - index set of columns to extract (or `NULL`)
5436 
5437   Output Parameter:
5438 . A_loc - the local sequential matrix generated
5439 
5440   Level: developer
5441 
5442 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5443 @*/
5444 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5445 {
5446   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5447   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5448   IS          isrowa, iscola;
5449   Mat        *aloc;
5450   PetscBool   match;
5451 
5452   PetscFunctionBegin;
5453   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5454   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5455   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5456   if (!row) {
5457     start = A->rmap->rstart;
5458     end   = A->rmap->rend;
5459     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5460   } else {
5461     isrowa = *row;
5462   }
5463   if (!col) {
5464     start = A->cmap->rstart;
5465     cmap  = a->garray;
5466     nzA   = a->A->cmap->n;
5467     nzB   = a->B->cmap->n;
5468     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5469     ncols = 0;
5470     for (i = 0; i < nzB; i++) {
5471       if (cmap[i] < start) idx[ncols++] = cmap[i];
5472       else break;
5473     }
5474     imark = i;
5475     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5476     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5477     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5478   } else {
5479     iscola = *col;
5480   }
5481   if (scall != MAT_INITIAL_MATRIX) {
5482     PetscCall(PetscMalloc1(1, &aloc));
5483     aloc[0] = *A_loc;
5484   }
5485   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5486   if (!col) { /* attach global id of condensed columns */
5487     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5488   }
5489   *A_loc = aloc[0];
5490   PetscCall(PetscFree(aloc));
5491   if (!row) PetscCall(ISDestroy(&isrowa));
5492   if (!col) PetscCall(ISDestroy(&iscola));
5493   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5494   PetscFunctionReturn(PETSC_SUCCESS);
5495 }
5496 
5497 /*
5498  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5499  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5500  * on a global size.
5501  * */
5502 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5503 {
5504   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5505   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5506   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5507   PetscMPIInt            owner;
5508   PetscSFNode           *iremote, *oiremote;
5509   const PetscInt        *lrowindices;
5510   PetscSF                sf, osf;
5511   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5512   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5513   MPI_Comm               comm;
5514   ISLocalToGlobalMapping mapping;
5515   const PetscScalar     *pd_a, *po_a;
5516 
5517   PetscFunctionBegin;
5518   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5519   /* plocalsize is the number of roots
5520    * nrows is the number of leaves
5521    * */
5522   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5523   PetscCall(ISGetLocalSize(rows, &nrows));
5524   PetscCall(PetscCalloc1(nrows, &iremote));
5525   PetscCall(ISGetIndices(rows, &lrowindices));
5526   for (i = 0; i < nrows; i++) {
5527     /* Find a remote index and an owner for a row
5528      * The row could be local or remote
5529      * */
5530     owner = 0;
5531     lidx  = 0;
5532     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5533     iremote[i].index = lidx;
5534     iremote[i].rank  = owner;
5535   }
5536   /* Create SF to communicate how many nonzero columns for each row */
5537   PetscCall(PetscSFCreate(comm, &sf));
5538   /* SF will figure out the number of nonzero columns for each row, and their
5539    * offsets
5540    * */
5541   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5542   PetscCall(PetscSFSetFromOptions(sf));
5543   PetscCall(PetscSFSetUp(sf));
5544 
5545   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5546   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5547   PetscCall(PetscCalloc1(nrows, &pnnz));
5548   roffsets[0] = 0;
5549   roffsets[1] = 0;
5550   for (i = 0; i < plocalsize; i++) {
5551     /* diagonal */
5552     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5553     /* off-diagonal */
5554     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5555     /* compute offsets so that we relative location for each row */
5556     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5557     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5558   }
5559   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5560   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5561   /* 'r' means root, and 'l' means leaf */
5562   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5563   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5564   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5565   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5566   PetscCall(PetscSFDestroy(&sf));
5567   PetscCall(PetscFree(roffsets));
5568   PetscCall(PetscFree(nrcols));
5569   dntotalcols = 0;
5570   ontotalcols = 0;
5571   ncol        = 0;
5572   for (i = 0; i < nrows; i++) {
5573     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5574     ncol    = PetscMax(pnnz[i], ncol);
5575     /* diagonal */
5576     dntotalcols += nlcols[i * 2 + 0];
5577     /* off-diagonal */
5578     ontotalcols += nlcols[i * 2 + 1];
5579   }
5580   /* We do not need to figure the right number of columns
5581    * since all the calculations will be done by going through the raw data
5582    * */
5583   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5584   PetscCall(MatSetUp(*P_oth));
5585   PetscCall(PetscFree(pnnz));
5586   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5587   /* diagonal */
5588   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5589   /* off-diagonal */
5590   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5591   /* diagonal */
5592   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5593   /* off-diagonal */
5594   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5595   dntotalcols = 0;
5596   ontotalcols = 0;
5597   ntotalcols  = 0;
5598   for (i = 0; i < nrows; i++) {
5599     owner = 0;
5600     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5601     /* Set iremote for diag matrix */
5602     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5603       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5604       iremote[dntotalcols].rank  = owner;
5605       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5606       ilocal[dntotalcols++] = ntotalcols++;
5607     }
5608     /* off-diagonal */
5609     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5610       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5611       oiremote[ontotalcols].rank  = owner;
5612       oilocal[ontotalcols++]      = ntotalcols++;
5613     }
5614   }
5615   PetscCall(ISRestoreIndices(rows, &lrowindices));
5616   PetscCall(PetscFree(loffsets));
5617   PetscCall(PetscFree(nlcols));
5618   PetscCall(PetscSFCreate(comm, &sf));
5619   /* P serves as roots and P_oth is leaves
5620    * Diag matrix
5621    * */
5622   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5623   PetscCall(PetscSFSetFromOptions(sf));
5624   PetscCall(PetscSFSetUp(sf));
5625 
5626   PetscCall(PetscSFCreate(comm, &osf));
5627   /* off-diagonal */
5628   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5629   PetscCall(PetscSFSetFromOptions(osf));
5630   PetscCall(PetscSFSetUp(osf));
5631   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5632   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5633   /* operate on the matrix internal data to save memory */
5634   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5635   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5636   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5637   /* Convert to global indices for diag matrix */
5638   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5639   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5640   /* We want P_oth store global indices */
5641   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5642   /* Use memory scalable approach */
5643   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5644   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5645   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5646   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5647   /* Convert back to local indices */
5648   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5649   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5650   nout = 0;
5651   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5652   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5653   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5654   /* Exchange values */
5655   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5656   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5657   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5658   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5659   /* Stop PETSc from shrinking memory */
5660   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5661   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5662   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5663   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5664   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5665   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5666   PetscCall(PetscSFDestroy(&sf));
5667   PetscCall(PetscSFDestroy(&osf));
5668   PetscFunctionReturn(PETSC_SUCCESS);
5669 }
5670 
5671 /*
5672  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5673  * This supports MPIAIJ and MAIJ
5674  * */
5675 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5676 {
5677   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5678   Mat_SeqAIJ *p_oth;
5679   IS          rows, map;
5680   PetscHMapI  hamp;
5681   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5682   MPI_Comm    comm;
5683   PetscSF     sf, osf;
5684   PetscBool   has;
5685 
5686   PetscFunctionBegin;
5687   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5688   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5689   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5690    *  and then create a submatrix (that often is an overlapping matrix)
5691    * */
5692   if (reuse == MAT_INITIAL_MATRIX) {
5693     /* Use a hash table to figure out unique keys */
5694     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5695     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5696     count = 0;
5697     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5698     for (i = 0; i < a->B->cmap->n; i++) {
5699       key = a->garray[i] / dof;
5700       PetscCall(PetscHMapIHas(hamp, key, &has));
5701       if (!has) {
5702         mapping[i] = count;
5703         PetscCall(PetscHMapISet(hamp, key, count++));
5704       } else {
5705         /* Current 'i' has the same value the previous step */
5706         mapping[i] = count - 1;
5707       }
5708     }
5709     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5710     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5711     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5712     PetscCall(PetscCalloc1(htsize, &rowindices));
5713     off = 0;
5714     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5715     PetscCall(PetscHMapIDestroy(&hamp));
5716     PetscCall(PetscSortInt(htsize, rowindices));
5717     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5718     /* In case, the matrix was already created but users want to recreate the matrix */
5719     PetscCall(MatDestroy(P_oth));
5720     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5721     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5722     PetscCall(ISDestroy(&map));
5723     PetscCall(ISDestroy(&rows));
5724   } else if (reuse == MAT_REUSE_MATRIX) {
5725     /* If matrix was already created, we simply update values using SF objects
5726      * that as attached to the matrix earlier.
5727      */
5728     const PetscScalar *pd_a, *po_a;
5729 
5730     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5731     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5732     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5733     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5734     /* Update values in place */
5735     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5736     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5737     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5738     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5739     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5740     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5741     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5742     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5743   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5744   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5745   PetscFunctionReturn(PETSC_SUCCESS);
5746 }
5747 
5748 /*@C
5749   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5750 
5751   Collective
5752 
5753   Input Parameters:
5754 + A     - the first matrix in `MATMPIAIJ` format
5755 . B     - the second matrix in `MATMPIAIJ` format
5756 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5757 
5758   Output Parameters:
5759 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5760 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5761 - B_seq - the sequential matrix generated
5762 
5763   Level: developer
5764 
5765 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5766 @*/
5767 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5768 {
5769   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5770   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5771   IS          isrowb, iscolb;
5772   Mat        *bseq = NULL;
5773 
5774   PetscFunctionBegin;
5775   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5776              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5777   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5778 
5779   if (scall == MAT_INITIAL_MATRIX) {
5780     start = A->cmap->rstart;
5781     cmap  = a->garray;
5782     nzA   = a->A->cmap->n;
5783     nzB   = a->B->cmap->n;
5784     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5785     ncols = 0;
5786     for (i = 0; i < nzB; i++) { /* row < local row index */
5787       if (cmap[i] < start) idx[ncols++] = cmap[i];
5788       else break;
5789     }
5790     imark = i;
5791     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5792     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5793     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5794     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5795   } else {
5796     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5797     isrowb = *rowb;
5798     iscolb = *colb;
5799     PetscCall(PetscMalloc1(1, &bseq));
5800     bseq[0] = *B_seq;
5801   }
5802   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5803   *B_seq = bseq[0];
5804   PetscCall(PetscFree(bseq));
5805   if (!rowb) {
5806     PetscCall(ISDestroy(&isrowb));
5807   } else {
5808     *rowb = isrowb;
5809   }
5810   if (!colb) {
5811     PetscCall(ISDestroy(&iscolb));
5812   } else {
5813     *colb = iscolb;
5814   }
5815   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5816   PetscFunctionReturn(PETSC_SUCCESS);
5817 }
5818 
5819 /*
5820     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5821     of the OFF-DIAGONAL portion of local A
5822 
5823     Collective
5824 
5825    Input Parameters:
5826 +    A,B - the matrices in `MATMPIAIJ` format
5827 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5828 
5829    Output Parameter:
5830 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5831 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5832 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5833 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5834 
5835     Developer Note:
5836     This directly accesses information inside the VecScatter associated with the matrix-vector product
5837      for this matrix. This is not desirable..
5838 
5839     Level: developer
5840 
5841 */
5842 
5843 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5844 {
5845   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5846   VecScatter         ctx;
5847   MPI_Comm           comm;
5848   const PetscMPIInt *rprocs, *sprocs;
5849   PetscMPIInt        nrecvs, nsends;
5850   const PetscInt    *srow, *rstarts, *sstarts;
5851   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5852   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5853   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5854   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5855   PetscMPIInt        size, tag, rank, nreqs;
5856 
5857   PetscFunctionBegin;
5858   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5859   PetscCallMPI(MPI_Comm_size(comm, &size));
5860 
5861   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5862              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5863   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5864   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5865 
5866   if (size == 1) {
5867     startsj_s = NULL;
5868     bufa_ptr  = NULL;
5869     *B_oth    = NULL;
5870     PetscFunctionReturn(PETSC_SUCCESS);
5871   }
5872 
5873   ctx = a->Mvctx;
5874   tag = ((PetscObject)ctx)->tag;
5875 
5876   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5877   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5878   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5879   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5880   PetscCall(PetscMalloc1(nreqs, &reqs));
5881   rwaits = reqs;
5882   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5883 
5884   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5885   if (scall == MAT_INITIAL_MATRIX) {
5886     /* i-array */
5887     /*  post receives */
5888     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5889     for (i = 0; i < nrecvs; i++) {
5890       rowlen = rvalues + rstarts[i] * rbs;
5891       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5892       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5893     }
5894 
5895     /* pack the outgoing message */
5896     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5897 
5898     sstartsj[0] = 0;
5899     rstartsj[0] = 0;
5900     len         = 0; /* total length of j or a array to be sent */
5901     if (nsends) {
5902       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5903       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5904     }
5905     for (i = 0; i < nsends; i++) {
5906       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5907       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5908       for (j = 0; j < nrows; j++) {
5909         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5910         for (l = 0; l < sbs; l++) {
5911           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5912 
5913           rowlen[j * sbs + l] = ncols;
5914 
5915           len += ncols;
5916           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5917         }
5918         k++;
5919       }
5920       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5921 
5922       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5923     }
5924     /* recvs and sends of i-array are completed */
5925     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5926     PetscCall(PetscFree(svalues));
5927 
5928     /* allocate buffers for sending j and a arrays */
5929     PetscCall(PetscMalloc1(len + 1, &bufj));
5930     PetscCall(PetscMalloc1(len + 1, &bufa));
5931 
5932     /* create i-array of B_oth */
5933     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5934 
5935     b_othi[0] = 0;
5936     len       = 0; /* total length of j or a array to be received */
5937     k         = 0;
5938     for (i = 0; i < nrecvs; i++) {
5939       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5940       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5941       for (j = 0; j < nrows; j++) {
5942         b_othi[k + 1] = b_othi[k] + rowlen[j];
5943         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5944         k++;
5945       }
5946       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5947     }
5948     PetscCall(PetscFree(rvalues));
5949 
5950     /* allocate space for j and a arrays of B_oth */
5951     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5952     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5953 
5954     /* j-array */
5955     /*  post receives of j-array */
5956     for (i = 0; i < nrecvs; i++) {
5957       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5958       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5959     }
5960 
5961     /* pack the outgoing message j-array */
5962     if (nsends) k = sstarts[0];
5963     for (i = 0; i < nsends; i++) {
5964       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5965       bufJ  = bufj + sstartsj[i];
5966       for (j = 0; j < nrows; j++) {
5967         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5968         for (ll = 0; ll < sbs; ll++) {
5969           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5970           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5971           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5972         }
5973       }
5974       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5975     }
5976 
5977     /* recvs and sends of j-array are completed */
5978     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5979   } else if (scall == MAT_REUSE_MATRIX) {
5980     sstartsj = *startsj_s;
5981     rstartsj = *startsj_r;
5982     bufa     = *bufa_ptr;
5983     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5984   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5985 
5986   /* a-array */
5987   /*  post receives of a-array */
5988   for (i = 0; i < nrecvs; i++) {
5989     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5990     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5991   }
5992 
5993   /* pack the outgoing message a-array */
5994   if (nsends) k = sstarts[0];
5995   for (i = 0; i < nsends; i++) {
5996     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5997     bufA  = bufa + sstartsj[i];
5998     for (j = 0; j < nrows; j++) {
5999       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6000       for (ll = 0; ll < sbs; ll++) {
6001         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6002         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6003         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6004       }
6005     }
6006     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6007   }
6008   /* recvs and sends of a-array are completed */
6009   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6010   PetscCall(PetscFree(reqs));
6011 
6012   if (scall == MAT_INITIAL_MATRIX) {
6013     Mat_SeqAIJ *b_oth;
6014 
6015     /* put together the new matrix */
6016     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6017 
6018     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6019     /* Since these are PETSc arrays, change flags to free them as necessary. */
6020     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6021     b_oth->free_a  = PETSC_TRUE;
6022     b_oth->free_ij = PETSC_TRUE;
6023     b_oth->nonew   = 0;
6024 
6025     PetscCall(PetscFree(bufj));
6026     if (!startsj_s || !bufa_ptr) {
6027       PetscCall(PetscFree2(sstartsj, rstartsj));
6028       PetscCall(PetscFree(bufa_ptr));
6029     } else {
6030       *startsj_s = sstartsj;
6031       *startsj_r = rstartsj;
6032       *bufa_ptr  = bufa;
6033     }
6034   } else if (scall == MAT_REUSE_MATRIX) {
6035     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6036   }
6037 
6038   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6039   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6040   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6041   PetscFunctionReturn(PETSC_SUCCESS);
6042 }
6043 
6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6047 #if defined(PETSC_HAVE_MKL_SPARSE)
6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6049 #endif
6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6052 #if defined(PETSC_HAVE_ELEMENTAL)
6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6054 #endif
6055 #if defined(PETSC_HAVE_SCALAPACK)
6056 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6057 #endif
6058 #if defined(PETSC_HAVE_HYPRE)
6059 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6060 #endif
6061 #if defined(PETSC_HAVE_CUDA)
6062 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6063 #endif
6064 #if defined(PETSC_HAVE_HIP)
6065 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6066 #endif
6067 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6068 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6069 #endif
6070 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6071 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6072 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6073 
6074 /*
6075     Computes (B'*A')' since computing B*A directly is untenable
6076 
6077                n                       p                          p
6078         [             ]       [             ]         [                 ]
6079       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6080         [             ]       [             ]         [                 ]
6081 
6082 */
6083 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6084 {
6085   Mat At, Bt, Ct;
6086 
6087   PetscFunctionBegin;
6088   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6089   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6090   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6091   PetscCall(MatDestroy(&At));
6092   PetscCall(MatDestroy(&Bt));
6093   PetscCall(MatTransposeSetPrecursor(Ct, C));
6094   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6095   PetscCall(MatDestroy(&Ct));
6096   PetscFunctionReturn(PETSC_SUCCESS);
6097 }
6098 
6099 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6100 {
6101   PetscBool cisdense;
6102 
6103   PetscFunctionBegin;
6104   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6105   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6106   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6107   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6108   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6109   PetscCall(MatSetUp(C));
6110 
6111   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6112   PetscFunctionReturn(PETSC_SUCCESS);
6113 }
6114 
6115 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6116 {
6117   Mat_Product *product = C->product;
6118   Mat          A = product->A, B = product->B;
6119 
6120   PetscFunctionBegin;
6121   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6122              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6123   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6124   C->ops->productsymbolic = MatProductSymbolic_AB;
6125   PetscFunctionReturn(PETSC_SUCCESS);
6126 }
6127 
6128 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6129 {
6130   Mat_Product *product = C->product;
6131 
6132   PetscFunctionBegin;
6133   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6134   PetscFunctionReturn(PETSC_SUCCESS);
6135 }
6136 
6137 /*
6138    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6139 
6140   Input Parameters:
6141 
6142     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6143     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6144 
6145     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6146 
6147     For Set1, j1[] contains column indices of the nonzeros.
6148     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6149     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6150     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6151 
6152     Similar for Set2.
6153 
6154     This routine merges the two sets of nonzeros row by row and removes repeats.
6155 
6156   Output Parameters: (memory is allocated by the caller)
6157 
6158     i[],j[]: the CSR of the merged matrix, which has m rows.
6159     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6160     imap2[]: similar to imap1[], but for Set2.
6161     Note we order nonzeros row-by-row and from left to right.
6162 */
6163 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6164 {
6165   PetscInt   r, m; /* Row index of mat */
6166   PetscCount t, t1, t2, b1, e1, b2, e2;
6167 
6168   PetscFunctionBegin;
6169   PetscCall(MatGetLocalSize(mat, &m, NULL));
6170   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6171   i[0]        = 0;
6172   for (r = 0; r < m; r++) { /* Do row by row merging */
6173     b1 = rowBegin1[r];
6174     e1 = rowEnd1[r];
6175     b2 = rowBegin2[r];
6176     e2 = rowEnd2[r];
6177     while (b1 < e1 && b2 < e2) {
6178       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6179         j[t]      = j1[b1];
6180         imap1[t1] = t;
6181         imap2[t2] = t;
6182         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6183         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6184         t1++;
6185         t2++;
6186         t++;
6187       } else if (j1[b1] < j2[b2]) {
6188         j[t]      = j1[b1];
6189         imap1[t1] = t;
6190         b1 += jmap1[t1 + 1] - jmap1[t1];
6191         t1++;
6192         t++;
6193       } else {
6194         j[t]      = j2[b2];
6195         imap2[t2] = t;
6196         b2 += jmap2[t2 + 1] - jmap2[t2];
6197         t2++;
6198         t++;
6199       }
6200     }
6201     /* Merge the remaining in either j1[] or j2[] */
6202     while (b1 < e1) {
6203       j[t]      = j1[b1];
6204       imap1[t1] = t;
6205       b1 += jmap1[t1 + 1] - jmap1[t1];
6206       t1++;
6207       t++;
6208     }
6209     while (b2 < e2) {
6210       j[t]      = j2[b2];
6211       imap2[t2] = t;
6212       b2 += jmap2[t2 + 1] - jmap2[t2];
6213       t2++;
6214       t++;
6215     }
6216     PetscCall(PetscIntCast(t, i + r + 1));
6217   }
6218   PetscFunctionReturn(PETSC_SUCCESS);
6219 }
6220 
6221 /*
6222   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6223 
6224   Input Parameters:
6225     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6226     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6227       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6228 
6229       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6230       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6231 
6232   Output Parameters:
6233     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6234     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6235       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6236       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6237 
6238     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6239       Atot: number of entries belonging to the diagonal block.
6240       Annz: number of unique nonzeros belonging to the diagonal block.
6241       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6242         repeats (i.e., same 'i,j' pair).
6243       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6244         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6245 
6246       Atot: number of entries belonging to the diagonal block
6247       Annz: number of unique nonzeros belonging to the diagonal block.
6248 
6249     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6250 
6251     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6252 */
6253 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6254 {
6255   PetscInt    cstart, cend, rstart, rend, row, col;
6256   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6257   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6258   PetscCount  k, m, p, q, r, s, mid;
6259   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6260 
6261   PetscFunctionBegin;
6262   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6263   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6264   m = rend - rstart;
6265 
6266   /* Skip negative rows */
6267   for (k = 0; k < n; k++)
6268     if (i[k] >= 0) break;
6269 
6270   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6271      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6272   */
6273   while (k < n) {
6274     row = i[k];
6275     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6276     for (s = k; s < n; s++)
6277       if (i[s] != row) break;
6278 
6279     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6280     for (p = k; p < s; p++) {
6281       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6282       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6283     }
6284     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6285     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6286     rowBegin[row - rstart] = k;
6287     rowMid[row - rstart]   = mid;
6288     rowEnd[row - rstart]   = s;
6289 
6290     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6291     Atot += mid - k;
6292     Btot += s - mid;
6293 
6294     /* Count unique nonzeros of this diag row */
6295     for (p = k; p < mid;) {
6296       col = j[p];
6297       do {
6298         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6299         p++;
6300       } while (p < mid && j[p] == col);
6301       Annz++;
6302     }
6303 
6304     /* Count unique nonzeros of this offdiag row */
6305     for (p = mid; p < s;) {
6306       col = j[p];
6307       do {
6308         p++;
6309       } while (p < s && j[p] == col);
6310       Bnnz++;
6311     }
6312     k = s;
6313   }
6314 
6315   /* Allocation according to Atot, Btot, Annz, Bnnz */
6316   PetscCall(PetscMalloc1(Atot, &Aperm));
6317   PetscCall(PetscMalloc1(Btot, &Bperm));
6318   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6319   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6320 
6321   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6322   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6323   for (r = 0; r < m; r++) {
6324     k   = rowBegin[r];
6325     mid = rowMid[r];
6326     s   = rowEnd[r];
6327     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6328     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6329     Atot += mid - k;
6330     Btot += s - mid;
6331 
6332     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6333     for (p = k; p < mid;) {
6334       col = j[p];
6335       q   = p;
6336       do {
6337         p++;
6338       } while (p < mid && j[p] == col);
6339       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6340       Annz++;
6341     }
6342 
6343     for (p = mid; p < s;) {
6344       col = j[p];
6345       q   = p;
6346       do {
6347         p++;
6348       } while (p < s && j[p] == col);
6349       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6350       Bnnz++;
6351     }
6352   }
6353   /* Output */
6354   *Aperm_ = Aperm;
6355   *Annz_  = Annz;
6356   *Atot_  = Atot;
6357   *Ajmap_ = Ajmap;
6358   *Bperm_ = Bperm;
6359   *Bnnz_  = Bnnz;
6360   *Btot_  = Btot;
6361   *Bjmap_ = Bjmap;
6362   PetscFunctionReturn(PETSC_SUCCESS);
6363 }
6364 
6365 /*
6366   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6367 
6368   Input Parameters:
6369     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6370     nnz:  number of unique nonzeros in the merged matrix
6371     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6372     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6373 
6374   Output Parameter: (memory is allocated by the caller)
6375     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6376 
6377   Example:
6378     nnz1 = 4
6379     nnz  = 6
6380     imap = [1,3,4,5]
6381     jmap = [0,3,5,6,7]
6382    then,
6383     jmap_new = [0,0,3,3,5,6,7]
6384 */
6385 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6386 {
6387   PetscCount k, p;
6388 
6389   PetscFunctionBegin;
6390   jmap_new[0] = 0;
6391   p           = nnz;                /* p loops over jmap_new[] backwards */
6392   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6393     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6394   }
6395   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6396   PetscFunctionReturn(PETSC_SUCCESS);
6397 }
6398 
6399 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6400 {
6401   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6402 
6403   PetscFunctionBegin;
6404   PetscCall(PetscSFDestroy(&coo->sf));
6405   PetscCall(PetscFree(coo->Aperm1));
6406   PetscCall(PetscFree(coo->Bperm1));
6407   PetscCall(PetscFree(coo->Ajmap1));
6408   PetscCall(PetscFree(coo->Bjmap1));
6409   PetscCall(PetscFree(coo->Aimap2));
6410   PetscCall(PetscFree(coo->Bimap2));
6411   PetscCall(PetscFree(coo->Aperm2));
6412   PetscCall(PetscFree(coo->Bperm2));
6413   PetscCall(PetscFree(coo->Ajmap2));
6414   PetscCall(PetscFree(coo->Bjmap2));
6415   PetscCall(PetscFree(coo->Cperm1));
6416   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6417   PetscCall(PetscFree(coo));
6418   PetscFunctionReturn(PETSC_SUCCESS);
6419 }
6420 
6421 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6422 {
6423   MPI_Comm             comm;
6424   PetscMPIInt          rank, size;
6425   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6426   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6427   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6428   PetscContainer       container;
6429   MatCOOStruct_MPIAIJ *coo;
6430 
6431   PetscFunctionBegin;
6432   PetscCall(PetscFree(mpiaij->garray));
6433   PetscCall(VecDestroy(&mpiaij->lvec));
6434 #if defined(PETSC_USE_CTABLE)
6435   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6436 #else
6437   PetscCall(PetscFree(mpiaij->colmap));
6438 #endif
6439   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6440   mat->assembled     = PETSC_FALSE;
6441   mat->was_assembled = PETSC_FALSE;
6442 
6443   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6444   PetscCallMPI(MPI_Comm_size(comm, &size));
6445   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6446   PetscCall(PetscLayoutSetUp(mat->rmap));
6447   PetscCall(PetscLayoutSetUp(mat->cmap));
6448   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6449   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6450   PetscCall(MatGetLocalSize(mat, &m, &n));
6451   PetscCall(MatGetSize(mat, &M, &N));
6452 
6453   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6454   /* entries come first, then local rows, then remote rows.                     */
6455   PetscCount n1 = coo_n, *perm1;
6456   PetscInt  *i1 = coo_i, *j1 = coo_j;
6457 
6458   PetscCall(PetscMalloc1(n1, &perm1));
6459   for (k = 0; k < n1; k++) perm1[k] = k;
6460 
6461   /* Manipulate indices so that entries with negative row or col indices will have smallest
6462      row indices, local entries will have greater but negative row indices, and remote entries
6463      will have positive row indices.
6464   */
6465   for (k = 0; k < n1; k++) {
6466     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6467     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6468     else {
6469       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6470       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6471     }
6472   }
6473 
6474   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6475   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6476 
6477   /* Advance k to the first entry we need to take care of */
6478   for (k = 0; k < n1; k++)
6479     if (i1[k] > PETSC_INT_MIN) break;
6480   PetscCount i1start = k;
6481 
6482   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6483   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6484 
6485   /*           Send remote rows to their owner                                  */
6486   /* Find which rows should be sent to which remote ranks*/
6487   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6488   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6489   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6490   const PetscInt *ranges;
6491   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6492 
6493   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6494   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6495   for (k = rem; k < n1;) {
6496     PetscMPIInt owner;
6497     PetscInt    firstRow, lastRow;
6498 
6499     /* Locate a row range */
6500     firstRow = i1[k]; /* first row of this owner */
6501     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6502     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6503 
6504     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6505     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6506 
6507     /* All entries in [k,p) belong to this remote owner */
6508     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6509       PetscMPIInt *sendto2;
6510       PetscInt    *nentries2;
6511       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6512 
6513       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6514       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6515       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6516       PetscCall(PetscFree2(sendto, nentries2));
6517       sendto   = sendto2;
6518       nentries = nentries2;
6519       maxNsend = maxNsend2;
6520     }
6521     sendto[nsend] = owner;
6522     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6523     nsend++;
6524     k = p;
6525   }
6526 
6527   /* Build 1st SF to know offsets on remote to send data */
6528   PetscSF      sf1;
6529   PetscInt     nroots = 1, nroots2 = 0;
6530   PetscInt     nleaves = nsend, nleaves2 = 0;
6531   PetscInt    *offsets;
6532   PetscSFNode *iremote;
6533 
6534   PetscCall(PetscSFCreate(comm, &sf1));
6535   PetscCall(PetscMalloc1(nsend, &iremote));
6536   PetscCall(PetscMalloc1(nsend, &offsets));
6537   for (k = 0; k < nsend; k++) {
6538     iremote[k].rank  = sendto[k];
6539     iremote[k].index = 0;
6540     nleaves2 += nentries[k];
6541     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6542   }
6543   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6544   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6545   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6546   PetscCall(PetscSFDestroy(&sf1));
6547   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6548 
6549   /* Build 2nd SF to send remote COOs to their owner */
6550   PetscSF sf2;
6551   nroots  = nroots2;
6552   nleaves = nleaves2;
6553   PetscCall(PetscSFCreate(comm, &sf2));
6554   PetscCall(PetscSFSetFromOptions(sf2));
6555   PetscCall(PetscMalloc1(nleaves, &iremote));
6556   p = 0;
6557   for (k = 0; k < nsend; k++) {
6558     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6559     for (q = 0; q < nentries[k]; q++, p++) {
6560       iremote[p].rank = sendto[k];
6561       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6562     }
6563   }
6564   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6565 
6566   /* Send the remote COOs to their owner */
6567   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6568   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6569   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6570   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6571   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6572   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6573   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6574   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6575   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6576   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6577   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6578 
6579   PetscCall(PetscFree(offsets));
6580   PetscCall(PetscFree2(sendto, nentries));
6581 
6582   /* Sort received COOs by row along with the permutation array     */
6583   for (k = 0; k < n2; k++) perm2[k] = k;
6584   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6585 
6586   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6587   PetscCount *Cperm1;
6588   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6589   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6590   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6591   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6592 
6593   /* Support for HYPRE matrices, kind of a hack.
6594      Swap min column with diagonal so that diagonal values will go first */
6595   PetscBool hypre;
6596   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6597   if (hypre) {
6598     PetscInt *minj;
6599     PetscBT   hasdiag;
6600 
6601     PetscCall(PetscBTCreate(m, &hasdiag));
6602     PetscCall(PetscMalloc1(m, &minj));
6603     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6604     for (k = i1start; k < rem; k++) {
6605       if (j1[k] < cstart || j1[k] >= cend) continue;
6606       const PetscInt rindex = i1[k] - rstart;
6607       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6608       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6609     }
6610     for (k = 0; k < n2; k++) {
6611       if (j2[k] < cstart || j2[k] >= cend) continue;
6612       const PetscInt rindex = i2[k] - rstart;
6613       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6614       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6615     }
6616     for (k = i1start; k < rem; k++) {
6617       const PetscInt rindex = i1[k] - rstart;
6618       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6619       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6620       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6621     }
6622     for (k = 0; k < n2; k++) {
6623       const PetscInt rindex = i2[k] - rstart;
6624       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6625       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6626       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6627     }
6628     PetscCall(PetscBTDestroy(&hasdiag));
6629     PetscCall(PetscFree(minj));
6630   }
6631 
6632   /* Split local COOs and received COOs into diag/offdiag portions */
6633   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6634   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6635   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6636   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6637   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6638   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6639 
6640   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6641   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6642   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6643   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6644 
6645   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6646   PetscInt *Ai, *Bi;
6647   PetscInt *Aj, *Bj;
6648 
6649   PetscCall(PetscMalloc1(m + 1, &Ai));
6650   PetscCall(PetscMalloc1(m + 1, &Bi));
6651   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6652   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6653 
6654   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6655   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6656   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6657   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6658   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6659 
6660   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6661   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6662 
6663   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6664   /* expect nonzeros in A/B most likely have local contributing entries        */
6665   PetscInt    Annz = Ai[m];
6666   PetscInt    Bnnz = Bi[m];
6667   PetscCount *Ajmap1_new, *Bjmap1_new;
6668 
6669   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6670   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6671 
6672   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6673   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6674 
6675   PetscCall(PetscFree(Aimap1));
6676   PetscCall(PetscFree(Ajmap1));
6677   PetscCall(PetscFree(Bimap1));
6678   PetscCall(PetscFree(Bjmap1));
6679   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6680   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6681   PetscCall(PetscFree(perm1));
6682   PetscCall(PetscFree3(i2, j2, perm2));
6683 
6684   Ajmap1 = Ajmap1_new;
6685   Bjmap1 = Bjmap1_new;
6686 
6687   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6688   if (Annz < Annz1 + Annz2) {
6689     PetscInt *Aj_new;
6690     PetscCall(PetscMalloc1(Annz, &Aj_new));
6691     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6692     PetscCall(PetscFree(Aj));
6693     Aj = Aj_new;
6694   }
6695 
6696   if (Bnnz < Bnnz1 + Bnnz2) {
6697     PetscInt *Bj_new;
6698     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6699     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6700     PetscCall(PetscFree(Bj));
6701     Bj = Bj_new;
6702   }
6703 
6704   /* Create new submatrices for on-process and off-process coupling                  */
6705   PetscScalar     *Aa, *Ba;
6706   MatType          rtype;
6707   Mat_SeqAIJ      *a, *b;
6708   PetscObjectState state;
6709   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6710   PetscCall(PetscCalloc1(Bnnz, &Ba));
6711   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6712   if (cstart) {
6713     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6714   }
6715 
6716   PetscCall(MatGetRootType_Private(mat, &rtype));
6717 
6718   MatSeqXAIJGetOptions_Private(mpiaij->A);
6719   PetscCall(MatDestroy(&mpiaij->A));
6720   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6721   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6722   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6723 
6724   MatSeqXAIJGetOptions_Private(mpiaij->B);
6725   PetscCall(MatDestroy(&mpiaij->B));
6726   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6727   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6728   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6729 
6730   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6731   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6732   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6733   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6734 
6735   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6736   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6737   a->free_a  = PETSC_TRUE;
6738   a->free_ij = PETSC_TRUE;
6739   b->free_a  = PETSC_TRUE;
6740   b->free_ij = PETSC_TRUE;
6741   a->maxnz   = a->nz;
6742   b->maxnz   = b->nz;
6743 
6744   /* conversion must happen AFTER multiply setup */
6745   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6746   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6747   PetscCall(VecDestroy(&mpiaij->lvec));
6748   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6749 
6750   // Put the COO struct in a container and then attach that to the matrix
6751   PetscCall(PetscMalloc1(1, &coo));
6752   coo->n       = coo_n;
6753   coo->sf      = sf2;
6754   coo->sendlen = nleaves;
6755   coo->recvlen = nroots;
6756   coo->Annz    = Annz;
6757   coo->Bnnz    = Bnnz;
6758   coo->Annz2   = Annz2;
6759   coo->Bnnz2   = Bnnz2;
6760   coo->Atot1   = Atot1;
6761   coo->Atot2   = Atot2;
6762   coo->Btot1   = Btot1;
6763   coo->Btot2   = Btot2;
6764   coo->Ajmap1  = Ajmap1;
6765   coo->Aperm1  = Aperm1;
6766   coo->Bjmap1  = Bjmap1;
6767   coo->Bperm1  = Bperm1;
6768   coo->Aimap2  = Aimap2;
6769   coo->Ajmap2  = Ajmap2;
6770   coo->Aperm2  = Aperm2;
6771   coo->Bimap2  = Bimap2;
6772   coo->Bjmap2  = Bjmap2;
6773   coo->Bperm2  = Bperm2;
6774   coo->Cperm1  = Cperm1;
6775   // Allocate in preallocation. If not used, it has zero cost on host
6776   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6777   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6778   PetscCall(PetscContainerSetPointer(container, coo));
6779   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6780   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6781   PetscCall(PetscContainerDestroy(&container));
6782   PetscFunctionReturn(PETSC_SUCCESS);
6783 }
6784 
6785 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6786 {
6787   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6788   Mat                  A = mpiaij->A, B = mpiaij->B;
6789   PetscScalar         *Aa, *Ba;
6790   PetscScalar         *sendbuf, *recvbuf;
6791   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6792   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6793   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6794   const PetscCount    *Cperm1;
6795   PetscContainer       container;
6796   MatCOOStruct_MPIAIJ *coo;
6797 
6798   PetscFunctionBegin;
6799   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6800   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6801   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6802   sendbuf = coo->sendbuf;
6803   recvbuf = coo->recvbuf;
6804   Ajmap1  = coo->Ajmap1;
6805   Ajmap2  = coo->Ajmap2;
6806   Aimap2  = coo->Aimap2;
6807   Bjmap1  = coo->Bjmap1;
6808   Bjmap2  = coo->Bjmap2;
6809   Bimap2  = coo->Bimap2;
6810   Aperm1  = coo->Aperm1;
6811   Aperm2  = coo->Aperm2;
6812   Bperm1  = coo->Bperm1;
6813   Bperm2  = coo->Bperm2;
6814   Cperm1  = coo->Cperm1;
6815 
6816   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6817   PetscCall(MatSeqAIJGetArray(B, &Ba));
6818 
6819   /* Pack entries to be sent to remote */
6820   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6821 
6822   /* Send remote entries to their owner and overlap the communication with local computation */
6823   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6824   /* Add local entries to A and B */
6825   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6826     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6827     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6828     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6829   }
6830   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6831     PetscScalar sum = 0.0;
6832     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6833     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6834   }
6835   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6836 
6837   /* Add received remote entries to A and B */
6838   for (PetscCount i = 0; i < coo->Annz2; i++) {
6839     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6840   }
6841   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6842     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6843   }
6844   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6845   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6846   PetscFunctionReturn(PETSC_SUCCESS);
6847 }
6848 
6849 /*MC
6850    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6851 
6852    Options Database Keys:
6853 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6854 
6855    Level: beginner
6856 
6857    Notes:
6858    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6859     in this case the values associated with the rows and columns one passes in are set to zero
6860     in the matrix
6861 
6862     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6863     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6864 
6865 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6866 M*/
6867 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6868 {
6869   Mat_MPIAIJ *b;
6870   PetscMPIInt size;
6871 
6872   PetscFunctionBegin;
6873   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6874 
6875   PetscCall(PetscNew(&b));
6876   B->data       = (void *)b;
6877   B->ops[0]     = MatOps_Values;
6878   B->assembled  = PETSC_FALSE;
6879   B->insertmode = NOT_SET_VALUES;
6880   b->size       = size;
6881 
6882   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6883 
6884   /* build cache for off array entries formed */
6885   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6886 
6887   b->donotstash  = PETSC_FALSE;
6888   b->colmap      = NULL;
6889   b->garray      = NULL;
6890   b->roworiented = PETSC_TRUE;
6891 
6892   /* stuff used for matrix vector multiply */
6893   b->lvec  = NULL;
6894   b->Mvctx = NULL;
6895 
6896   /* stuff for MatGetRow() */
6897   b->rowindices   = NULL;
6898   b->rowvalues    = NULL;
6899   b->getrowactive = PETSC_FALSE;
6900 
6901   /* flexible pointer used in CUSPARSE classes */
6902   b->spptr = NULL;
6903 
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6914 #if defined(PETSC_HAVE_CUDA)
6915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6916 #endif
6917 #if defined(PETSC_HAVE_HIP)
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6919 #endif
6920 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6922 #endif
6923 #if defined(PETSC_HAVE_MKL_SPARSE)
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6925 #endif
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6930 #if defined(PETSC_HAVE_ELEMENTAL)
6931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6932 #endif
6933 #if defined(PETSC_HAVE_SCALAPACK)
6934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6935 #endif
6936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6938 #if defined(PETSC_HAVE_HYPRE)
6939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6940   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6941 #endif
6942   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6943   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6944   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6945   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6946   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6947   PetscFunctionReturn(PETSC_SUCCESS);
6948 }
6949 
6950 /*@
6951   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6952   and "off-diagonal" part of the matrix in CSR format.
6953 
6954   Collective
6955 
6956   Input Parameters:
6957 + comm - MPI communicator
6958 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6959 . n    - This value should be the same as the local size used in creating the
6960          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6961          calculated if `N` is given) For square matrices `n` is almost always `m`.
6962 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6963 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6964 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6965 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6966 . a    - matrix values
6967 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6968 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6969 - oa   - matrix values
6970 
6971   Output Parameter:
6972 . mat - the matrix
6973 
6974   Level: advanced
6975 
6976   Notes:
6977   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6978   must free the arrays once the matrix has been destroyed and not before.
6979 
6980   The `i` and `j` indices are 0 based
6981 
6982   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6983 
6984   This sets local rows and cannot be used to set off-processor values.
6985 
6986   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6987   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6988   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6989   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6990   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6991   communication if it is known that only local entries will be set.
6992 
6993 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6994           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6995 @*/
6996 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6997 {
6998   Mat_MPIAIJ *maij;
6999 
7000   PetscFunctionBegin;
7001   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
7002   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
7003   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
7004   PetscCall(MatCreate(comm, mat));
7005   PetscCall(MatSetSizes(*mat, m, n, M, N));
7006   PetscCall(MatSetType(*mat, MATMPIAIJ));
7007   maij = (Mat_MPIAIJ *)(*mat)->data;
7008 
7009   (*mat)->preallocated = PETSC_TRUE;
7010 
7011   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7012   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7013 
7014   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7015   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7016 
7017   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7018   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7019   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7020   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7021   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7022   PetscFunctionReturn(PETSC_SUCCESS);
7023 }
7024 
7025 typedef struct {
7026   Mat       *mp;    /* intermediate products */
7027   PetscBool *mptmp; /* is the intermediate product temporary ? */
7028   PetscInt   cp;    /* number of intermediate products */
7029 
7030   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7031   PetscInt    *startsj_s, *startsj_r;
7032   PetscScalar *bufa;
7033   Mat          P_oth;
7034 
7035   /* may take advantage of merging product->B */
7036   Mat Bloc; /* B-local by merging diag and off-diag */
7037 
7038   /* cusparse does not have support to split between symbolic and numeric phases.
7039      When api_user is true, we don't need to update the numerical values
7040      of the temporary storage */
7041   PetscBool reusesym;
7042 
7043   /* support for COO values insertion */
7044   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7045   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7046   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7047   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7048   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7049   PetscMemType mtype;
7050 
7051   /* customization */
7052   PetscBool abmerge;
7053   PetscBool P_oth_bind;
7054 } MatMatMPIAIJBACKEND;
7055 
7056 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7057 {
7058   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7059   PetscInt             i;
7060 
7061   PetscFunctionBegin;
7062   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7063   PetscCall(PetscFree(mmdata->bufa));
7064   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7065   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7066   PetscCall(MatDestroy(&mmdata->P_oth));
7067   PetscCall(MatDestroy(&mmdata->Bloc));
7068   PetscCall(PetscSFDestroy(&mmdata->sf));
7069   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7070   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7071   PetscCall(PetscFree(mmdata->own[0]));
7072   PetscCall(PetscFree(mmdata->own));
7073   PetscCall(PetscFree(mmdata->off[0]));
7074   PetscCall(PetscFree(mmdata->off));
7075   PetscCall(PetscFree(mmdata));
7076   PetscFunctionReturn(PETSC_SUCCESS);
7077 }
7078 
7079 /* Copy selected n entries with indices in idx[] of A to v[].
7080    If idx is NULL, copy the whole data array of A to v[]
7081  */
7082 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7083 {
7084   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7085 
7086   PetscFunctionBegin;
7087   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7088   if (f) {
7089     PetscCall((*f)(A, n, idx, v));
7090   } else {
7091     const PetscScalar *vv;
7092 
7093     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7094     if (n && idx) {
7095       PetscScalar    *w  = v;
7096       const PetscInt *oi = idx;
7097       PetscInt        j;
7098 
7099       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7100     } else {
7101       PetscCall(PetscArraycpy(v, vv, n));
7102     }
7103     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7104   }
7105   PetscFunctionReturn(PETSC_SUCCESS);
7106 }
7107 
7108 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7109 {
7110   MatMatMPIAIJBACKEND *mmdata;
7111   PetscInt             i, n_d, n_o;
7112 
7113   PetscFunctionBegin;
7114   MatCheckProduct(C, 1);
7115   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7116   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7117   if (!mmdata->reusesym) { /* update temporary matrices */
7118     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7119     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7120   }
7121   mmdata->reusesym = PETSC_FALSE;
7122 
7123   for (i = 0; i < mmdata->cp; i++) {
7124     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7125     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7126   }
7127   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7128     PetscInt noff;
7129 
7130     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7131     if (mmdata->mptmp[i]) continue;
7132     if (noff) {
7133       PetscInt nown;
7134 
7135       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7136       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7137       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7138       n_o += noff;
7139       n_d += nown;
7140     } else {
7141       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7142 
7143       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7144       n_d += mm->nz;
7145     }
7146   }
7147   if (mmdata->hasoffproc) { /* offprocess insertion */
7148     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7149     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7150   }
7151   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7152   PetscFunctionReturn(PETSC_SUCCESS);
7153 }
7154 
7155 /* Support for Pt * A, A * P, or Pt * A * P */
7156 #define MAX_NUMBER_INTERMEDIATE 4
7157 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7158 {
7159   Mat_Product           *product = C->product;
7160   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7161   Mat_MPIAIJ            *a, *p;
7162   MatMatMPIAIJBACKEND   *mmdata;
7163   ISLocalToGlobalMapping P_oth_l2g = NULL;
7164   IS                     glob      = NULL;
7165   const char            *prefix;
7166   char                   pprefix[256];
7167   const PetscInt        *globidx, *P_oth_idx;
7168   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7169   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7170   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7171                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7172                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7173   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7174 
7175   MatProductType ptype;
7176   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7177   PetscMPIInt    size;
7178 
7179   PetscFunctionBegin;
7180   MatCheckProduct(C, 1);
7181   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7182   ptype = product->type;
7183   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7184     ptype                                          = MATPRODUCT_AB;
7185     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7186   }
7187   switch (ptype) {
7188   case MATPRODUCT_AB:
7189     A          = product->A;
7190     P          = product->B;
7191     m          = A->rmap->n;
7192     n          = P->cmap->n;
7193     M          = A->rmap->N;
7194     N          = P->cmap->N;
7195     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7196     break;
7197   case MATPRODUCT_AtB:
7198     P          = product->A;
7199     A          = product->B;
7200     m          = P->cmap->n;
7201     n          = A->cmap->n;
7202     M          = P->cmap->N;
7203     N          = A->cmap->N;
7204     hasoffproc = PETSC_TRUE;
7205     break;
7206   case MATPRODUCT_PtAP:
7207     A          = product->A;
7208     P          = product->B;
7209     m          = P->cmap->n;
7210     n          = P->cmap->n;
7211     M          = P->cmap->N;
7212     N          = P->cmap->N;
7213     hasoffproc = PETSC_TRUE;
7214     break;
7215   default:
7216     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7217   }
7218   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7219   if (size == 1) hasoffproc = PETSC_FALSE;
7220 
7221   /* defaults */
7222   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7223     mp[i]    = NULL;
7224     mptmp[i] = PETSC_FALSE;
7225     rmapt[i] = -1;
7226     cmapt[i] = -1;
7227     rmapa[i] = NULL;
7228     cmapa[i] = NULL;
7229   }
7230 
7231   /* customization */
7232   PetscCall(PetscNew(&mmdata));
7233   mmdata->reusesym = product->api_user;
7234   if (ptype == MATPRODUCT_AB) {
7235     if (product->api_user) {
7236       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7237       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7238       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7239       PetscOptionsEnd();
7240     } else {
7241       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7242       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7243       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7244       PetscOptionsEnd();
7245     }
7246   } else if (ptype == MATPRODUCT_PtAP) {
7247     if (product->api_user) {
7248       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7249       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7250       PetscOptionsEnd();
7251     } else {
7252       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7253       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7254       PetscOptionsEnd();
7255     }
7256   }
7257   a = (Mat_MPIAIJ *)A->data;
7258   p = (Mat_MPIAIJ *)P->data;
7259   PetscCall(MatSetSizes(C, m, n, M, N));
7260   PetscCall(PetscLayoutSetUp(C->rmap));
7261   PetscCall(PetscLayoutSetUp(C->cmap));
7262   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7263   PetscCall(MatGetOptionsPrefix(C, &prefix));
7264 
7265   cp = 0;
7266   switch (ptype) {
7267   case MATPRODUCT_AB: /* A * P */
7268     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7269 
7270     /* A_diag * P_local (merged or not) */
7271     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7272       /* P is product->B */
7273       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7274       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7275       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7276       PetscCall(MatProductSetFill(mp[cp], product->fill));
7277       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7278       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7279       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7280       mp[cp]->product->api_user = product->api_user;
7281       PetscCall(MatProductSetFromOptions(mp[cp]));
7282       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7283       PetscCall(ISGetIndices(glob, &globidx));
7284       rmapt[cp] = 1;
7285       cmapt[cp] = 2;
7286       cmapa[cp] = globidx;
7287       mptmp[cp] = PETSC_FALSE;
7288       cp++;
7289     } else { /* A_diag * P_diag and A_diag * P_off */
7290       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7291       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7292       PetscCall(MatProductSetFill(mp[cp], product->fill));
7293       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7294       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7295       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7296       mp[cp]->product->api_user = product->api_user;
7297       PetscCall(MatProductSetFromOptions(mp[cp]));
7298       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7299       rmapt[cp] = 1;
7300       cmapt[cp] = 1;
7301       mptmp[cp] = PETSC_FALSE;
7302       cp++;
7303       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7304       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7305       PetscCall(MatProductSetFill(mp[cp], product->fill));
7306       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7307       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7308       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7309       mp[cp]->product->api_user = product->api_user;
7310       PetscCall(MatProductSetFromOptions(mp[cp]));
7311       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7312       rmapt[cp] = 1;
7313       cmapt[cp] = 2;
7314       cmapa[cp] = p->garray;
7315       mptmp[cp] = PETSC_FALSE;
7316       cp++;
7317     }
7318 
7319     /* A_off * P_other */
7320     if (mmdata->P_oth) {
7321       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7322       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7323       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7324       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7325       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7326       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7327       PetscCall(MatProductSetFill(mp[cp], product->fill));
7328       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7329       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7330       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7331       mp[cp]->product->api_user = product->api_user;
7332       PetscCall(MatProductSetFromOptions(mp[cp]));
7333       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7334       rmapt[cp] = 1;
7335       cmapt[cp] = 2;
7336       cmapa[cp] = P_oth_idx;
7337       mptmp[cp] = PETSC_FALSE;
7338       cp++;
7339     }
7340     break;
7341 
7342   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7343     /* A is product->B */
7344     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7345     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7346       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7347       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7348       PetscCall(MatProductSetFill(mp[cp], product->fill));
7349       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7350       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7351       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7352       mp[cp]->product->api_user = product->api_user;
7353       PetscCall(MatProductSetFromOptions(mp[cp]));
7354       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7355       PetscCall(ISGetIndices(glob, &globidx));
7356       rmapt[cp] = 2;
7357       rmapa[cp] = globidx;
7358       cmapt[cp] = 2;
7359       cmapa[cp] = globidx;
7360       mptmp[cp] = PETSC_FALSE;
7361       cp++;
7362     } else {
7363       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7364       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7365       PetscCall(MatProductSetFill(mp[cp], product->fill));
7366       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7367       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7368       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7369       mp[cp]->product->api_user = product->api_user;
7370       PetscCall(MatProductSetFromOptions(mp[cp]));
7371       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7372       PetscCall(ISGetIndices(glob, &globidx));
7373       rmapt[cp] = 1;
7374       cmapt[cp] = 2;
7375       cmapa[cp] = globidx;
7376       mptmp[cp] = PETSC_FALSE;
7377       cp++;
7378       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7379       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7380       PetscCall(MatProductSetFill(mp[cp], product->fill));
7381       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7382       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7383       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7384       mp[cp]->product->api_user = product->api_user;
7385       PetscCall(MatProductSetFromOptions(mp[cp]));
7386       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7387       rmapt[cp] = 2;
7388       rmapa[cp] = p->garray;
7389       cmapt[cp] = 2;
7390       cmapa[cp] = globidx;
7391       mptmp[cp] = PETSC_FALSE;
7392       cp++;
7393     }
7394     break;
7395   case MATPRODUCT_PtAP:
7396     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7397     /* P is product->B */
7398     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7399     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7400     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7401     PetscCall(MatProductSetFill(mp[cp], product->fill));
7402     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7403     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7404     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7405     mp[cp]->product->api_user = product->api_user;
7406     PetscCall(MatProductSetFromOptions(mp[cp]));
7407     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7408     PetscCall(ISGetIndices(glob, &globidx));
7409     rmapt[cp] = 2;
7410     rmapa[cp] = globidx;
7411     cmapt[cp] = 2;
7412     cmapa[cp] = globidx;
7413     mptmp[cp] = PETSC_FALSE;
7414     cp++;
7415     if (mmdata->P_oth) {
7416       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7417       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7418       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7419       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7420       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7421       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7422       PetscCall(MatProductSetFill(mp[cp], product->fill));
7423       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7424       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7425       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7426       mp[cp]->product->api_user = product->api_user;
7427       PetscCall(MatProductSetFromOptions(mp[cp]));
7428       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7429       mptmp[cp] = PETSC_TRUE;
7430       cp++;
7431       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7432       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7433       PetscCall(MatProductSetFill(mp[cp], product->fill));
7434       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7435       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7436       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7437       mp[cp]->product->api_user = product->api_user;
7438       PetscCall(MatProductSetFromOptions(mp[cp]));
7439       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7440       rmapt[cp] = 2;
7441       rmapa[cp] = globidx;
7442       cmapt[cp] = 2;
7443       cmapa[cp] = P_oth_idx;
7444       mptmp[cp] = PETSC_FALSE;
7445       cp++;
7446     }
7447     break;
7448   default:
7449     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7450   }
7451   /* sanity check */
7452   if (size > 1)
7453     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7454 
7455   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7456   for (i = 0; i < cp; i++) {
7457     mmdata->mp[i]    = mp[i];
7458     mmdata->mptmp[i] = mptmp[i];
7459   }
7460   mmdata->cp             = cp;
7461   C->product->data       = mmdata;
7462   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7463   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7464 
7465   /* memory type */
7466   mmdata->mtype = PETSC_MEMTYPE_HOST;
7467   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7468   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7469   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7470   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7471   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7472   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7473 
7474   /* prepare coo coordinates for values insertion */
7475 
7476   /* count total nonzeros of those intermediate seqaij Mats
7477     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7478     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7479     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7480   */
7481   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7482     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7483     if (mptmp[cp]) continue;
7484     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7485       const PetscInt *rmap = rmapa[cp];
7486       const PetscInt  mr   = mp[cp]->rmap->n;
7487       const PetscInt  rs   = C->rmap->rstart;
7488       const PetscInt  re   = C->rmap->rend;
7489       const PetscInt *ii   = mm->i;
7490       for (i = 0; i < mr; i++) {
7491         const PetscInt gr = rmap[i];
7492         const PetscInt nz = ii[i + 1] - ii[i];
7493         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7494         else ncoo_oown += nz;                  /* this row is local */
7495       }
7496     } else ncoo_d += mm->nz;
7497   }
7498 
7499   /*
7500     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7501 
7502     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7503 
7504     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7505 
7506     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7507     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7508     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7509 
7510     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7511     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7512   */
7513   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7514   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7515 
7516   /* gather (i,j) of nonzeros inserted by remote procs */
7517   if (hasoffproc) {
7518     PetscSF  msf;
7519     PetscInt ncoo2, *coo_i2, *coo_j2;
7520 
7521     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7522     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7523     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7524 
7525     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7526       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7527       PetscInt   *idxoff = mmdata->off[cp];
7528       PetscInt   *idxown = mmdata->own[cp];
7529       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7530         const PetscInt *rmap = rmapa[cp];
7531         const PetscInt *cmap = cmapa[cp];
7532         const PetscInt *ii   = mm->i;
7533         PetscInt       *coi  = coo_i + ncoo_o;
7534         PetscInt       *coj  = coo_j + ncoo_o;
7535         const PetscInt  mr   = mp[cp]->rmap->n;
7536         const PetscInt  rs   = C->rmap->rstart;
7537         const PetscInt  re   = C->rmap->rend;
7538         const PetscInt  cs   = C->cmap->rstart;
7539         for (i = 0; i < mr; i++) {
7540           const PetscInt *jj = mm->j + ii[i];
7541           const PetscInt  gr = rmap[i];
7542           const PetscInt  nz = ii[i + 1] - ii[i];
7543           if (gr < rs || gr >= re) { /* this is an offproc row */
7544             for (j = ii[i]; j < ii[i + 1]; j++) {
7545               *coi++    = gr;
7546               *idxoff++ = j;
7547             }
7548             if (!cmapt[cp]) { /* already global */
7549               for (j = 0; j < nz; j++) *coj++ = jj[j];
7550             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7551               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7552             } else { /* offdiag */
7553               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7554             }
7555             ncoo_o += nz;
7556           } else { /* this is a local row */
7557             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7558           }
7559         }
7560       }
7561       mmdata->off[cp + 1] = idxoff;
7562       mmdata->own[cp + 1] = idxown;
7563     }
7564 
7565     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7566     PetscInt incoo_o;
7567     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7568     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7569     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7570     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7571     ncoo = ncoo_d + ncoo_oown + ncoo2;
7572     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7573     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7574     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7575     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7576     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7577     PetscCall(PetscFree2(coo_i, coo_j));
7578     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7579     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7580     coo_i = coo_i2;
7581     coo_j = coo_j2;
7582   } else { /* no offproc values insertion */
7583     ncoo = ncoo_d;
7584     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7585 
7586     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7587     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7588     PetscCall(PetscSFSetUp(mmdata->sf));
7589   }
7590   mmdata->hasoffproc = hasoffproc;
7591 
7592   /* gather (i,j) of nonzeros inserted locally */
7593   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7594     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7595     PetscInt       *coi  = coo_i + ncoo_d;
7596     PetscInt       *coj  = coo_j + ncoo_d;
7597     const PetscInt *jj   = mm->j;
7598     const PetscInt *ii   = mm->i;
7599     const PetscInt *cmap = cmapa[cp];
7600     const PetscInt *rmap = rmapa[cp];
7601     const PetscInt  mr   = mp[cp]->rmap->n;
7602     const PetscInt  rs   = C->rmap->rstart;
7603     const PetscInt  re   = C->rmap->rend;
7604     const PetscInt  cs   = C->cmap->rstart;
7605 
7606     if (mptmp[cp]) continue;
7607     if (rmapt[cp] == 1) { /* consecutive rows */
7608       /* fill coo_i */
7609       for (i = 0; i < mr; i++) {
7610         const PetscInt gr = i + rs;
7611         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7612       }
7613       /* fill coo_j */
7614       if (!cmapt[cp]) { /* type-0, already global */
7615         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7616       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7617         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7618       } else {                                            /* type-2, local to global for sparse columns */
7619         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7620       }
7621       ncoo_d += mm->nz;
7622     } else if (rmapt[cp] == 2) { /* sparse rows */
7623       for (i = 0; i < mr; i++) {
7624         const PetscInt *jj = mm->j + ii[i];
7625         const PetscInt  gr = rmap[i];
7626         const PetscInt  nz = ii[i + 1] - ii[i];
7627         if (gr >= rs && gr < re) { /* local rows */
7628           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7629           if (!cmapt[cp]) { /* type-0, already global */
7630             for (j = 0; j < nz; j++) *coj++ = jj[j];
7631           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7632             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7633           } else { /* type-2, local to global for sparse columns */
7634             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7635           }
7636           ncoo_d += nz;
7637         }
7638       }
7639     }
7640   }
7641   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7642   PetscCall(ISDestroy(&glob));
7643   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7644   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7645   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7646   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7647 
7648   /* preallocate with COO data */
7649   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7650   PetscCall(PetscFree2(coo_i, coo_j));
7651   PetscFunctionReturn(PETSC_SUCCESS);
7652 }
7653 
7654 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7655 {
7656   Mat_Product *product = mat->product;
7657 #if defined(PETSC_HAVE_DEVICE)
7658   PetscBool match  = PETSC_FALSE;
7659   PetscBool usecpu = PETSC_FALSE;
7660 #else
7661   PetscBool match = PETSC_TRUE;
7662 #endif
7663 
7664   PetscFunctionBegin;
7665   MatCheckProduct(mat, 1);
7666 #if defined(PETSC_HAVE_DEVICE)
7667   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7668   if (match) { /* we can always fallback to the CPU if requested */
7669     switch (product->type) {
7670     case MATPRODUCT_AB:
7671       if (product->api_user) {
7672         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7673         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7674         PetscOptionsEnd();
7675       } else {
7676         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7677         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7678         PetscOptionsEnd();
7679       }
7680       break;
7681     case MATPRODUCT_AtB:
7682       if (product->api_user) {
7683         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7684         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7685         PetscOptionsEnd();
7686       } else {
7687         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7688         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7689         PetscOptionsEnd();
7690       }
7691       break;
7692     case MATPRODUCT_PtAP:
7693       if (product->api_user) {
7694         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7695         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7696         PetscOptionsEnd();
7697       } else {
7698         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7699         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7700         PetscOptionsEnd();
7701       }
7702       break;
7703     default:
7704       break;
7705     }
7706     match = (PetscBool)!usecpu;
7707   }
7708 #endif
7709   if (match) {
7710     switch (product->type) {
7711     case MATPRODUCT_AB:
7712     case MATPRODUCT_AtB:
7713     case MATPRODUCT_PtAP:
7714       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7715       break;
7716     default:
7717       break;
7718     }
7719   }
7720   /* fallback to MPIAIJ ops */
7721   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7722   PetscFunctionReturn(PETSC_SUCCESS);
7723 }
7724 
7725 /*
7726    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7727 
7728    n - the number of block indices in cc[]
7729    cc - the block indices (must be large enough to contain the indices)
7730 */
7731 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7732 {
7733   PetscInt        cnt = -1, nidx, j;
7734   const PetscInt *idx;
7735 
7736   PetscFunctionBegin;
7737   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7738   if (nidx) {
7739     cnt     = 0;
7740     cc[cnt] = idx[0] / bs;
7741     for (j = 1; j < nidx; j++) {
7742       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7743     }
7744   }
7745   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7746   *n = cnt + 1;
7747   PetscFunctionReturn(PETSC_SUCCESS);
7748 }
7749 
7750 /*
7751     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7752 
7753     ncollapsed - the number of block indices
7754     collapsed - the block indices (must be large enough to contain the indices)
7755 */
7756 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7757 {
7758   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7759 
7760   PetscFunctionBegin;
7761   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7762   for (i = start + 1; i < start + bs; i++) {
7763     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7764     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7765     cprevtmp = cprev;
7766     cprev    = merged;
7767     merged   = cprevtmp;
7768   }
7769   *ncollapsed = nprev;
7770   if (collapsed) *collapsed = cprev;
7771   PetscFunctionReturn(PETSC_SUCCESS);
7772 }
7773 
7774 /*
7775  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7776 
7777  Input Parameter:
7778  . Amat - matrix
7779  - symmetrize - make the result symmetric
7780  + scale - scale with diagonal
7781 
7782  Output Parameter:
7783  . a_Gmat - output scalar graph >= 0
7784 
7785 */
7786 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7787 {
7788   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7789   MPI_Comm  comm;
7790   Mat       Gmat;
7791   PetscBool ismpiaij, isseqaij;
7792   Mat       a, b, c;
7793   MatType   jtype;
7794 
7795   PetscFunctionBegin;
7796   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7797   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7798   PetscCall(MatGetSize(Amat, &MM, &NN));
7799   PetscCall(MatGetBlockSize(Amat, &bs));
7800   nloc = (Iend - Istart) / bs;
7801 
7802   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7803   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7804   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7805 
7806   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7807   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7808      implementation */
7809   if (bs > 1) {
7810     PetscCall(MatGetType(Amat, &jtype));
7811     PetscCall(MatCreate(comm, &Gmat));
7812     PetscCall(MatSetType(Gmat, jtype));
7813     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7814     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7815     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7816       PetscInt  *d_nnz, *o_nnz;
7817       MatScalar *aa, val, *AA;
7818       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7819 
7820       if (isseqaij) {
7821         a = Amat;
7822         b = NULL;
7823       } else {
7824         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7825         a             = d->A;
7826         b             = d->B;
7827       }
7828       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7829       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7830       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7831         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7832         const PetscInt *cols1, *cols2;
7833 
7834         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7835           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7836           nnz[brow / bs] = nc2 / bs;
7837           if (nc2 % bs) ok = 0;
7838           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7839           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7840             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7841             if (nc1 != nc2) ok = 0;
7842             else {
7843               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7844                 if (cols1[jj] != cols2[jj]) ok = 0;
7845                 if (cols1[jj] % bs != jj % bs) ok = 0;
7846               }
7847             }
7848             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7849           }
7850           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7851           if (!ok) {
7852             PetscCall(PetscFree2(d_nnz, o_nnz));
7853             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7854             goto old_bs;
7855           }
7856         }
7857       }
7858       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7859       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7860       PetscCall(PetscFree2(d_nnz, o_nnz));
7861       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7862       // diag
7863       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7864         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7865 
7866         ai = aseq->i;
7867         n  = ai[brow + 1] - ai[brow];
7868         aj = aseq->j + ai[brow];
7869         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7870           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7871           val        = 0;
7872           if (index_size == 0) {
7873             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7874               aa = aseq->a + ai[brow + ii] + k;
7875               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7876                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7877               }
7878             }
7879           } else {                                            // use (index,index) value if provided
7880             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7881               PetscInt ii = index[iii];
7882               aa          = aseq->a + ai[brow + ii] + k;
7883               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7884                 PetscInt jj = index[jjj];
7885                 val += PetscAbs(PetscRealPart(aa[jj]));
7886               }
7887             }
7888           }
7889           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7890           AA[k / bs] = val;
7891         }
7892         grow = Istart / bs + brow / bs;
7893         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7894       }
7895       // off-diag
7896       if (ismpiaij) {
7897         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7898         const PetscScalar *vals;
7899         const PetscInt    *cols, *garray = aij->garray;
7900 
7901         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7902         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7903           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7904           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7905             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7906             AA[k / bs] = 0;
7907             AJ[cidx]   = garray[cols[k]] / bs;
7908           }
7909           nc = ncols / bs;
7910           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7911           if (index_size == 0) {
7912             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7913               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7914               for (PetscInt k = 0; k < ncols; k += bs) {
7915                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7916                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7917                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7918                 }
7919               }
7920               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7921             }
7922           } else {                                            // use (index,index) value if provided
7923             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7924               PetscInt ii = index[iii];
7925               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7926               for (PetscInt k = 0; k < ncols; k += bs) {
7927                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7928                   PetscInt jj = index[jjj];
7929                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7930                 }
7931               }
7932               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7933             }
7934           }
7935           grow = Istart / bs + brow / bs;
7936           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7937         }
7938       }
7939       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7940       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7941       PetscCall(PetscFree2(AA, AJ));
7942     } else {
7943       const PetscScalar *vals;
7944       const PetscInt    *idx;
7945       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7946     old_bs:
7947       /*
7948        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7949        */
7950       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7951       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7952       if (isseqaij) {
7953         PetscInt max_d_nnz;
7954 
7955         /*
7956          Determine exact preallocation count for (sequential) scalar matrix
7957          */
7958         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7959         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7960         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7961         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7962         PetscCall(PetscFree3(w0, w1, w2));
7963       } else if (ismpiaij) {
7964         Mat             Daij, Oaij;
7965         const PetscInt *garray;
7966         PetscInt        max_d_nnz;
7967 
7968         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7969         /*
7970          Determine exact preallocation count for diagonal block portion of scalar matrix
7971          */
7972         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7973         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7974         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7975         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7976         PetscCall(PetscFree3(w0, w1, w2));
7977         /*
7978          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7979          */
7980         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7981           o_nnz[jj] = 0;
7982           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7983             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7984             o_nnz[jj] += ncols;
7985             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7986           }
7987           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7988         }
7989       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7990       /* get scalar copy (norms) of matrix */
7991       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7992       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7993       PetscCall(PetscFree2(d_nnz, o_nnz));
7994       for (Ii = Istart; Ii < Iend; Ii++) {
7995         PetscInt dest_row = Ii / bs;
7996 
7997         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7998         for (jj = 0; jj < ncols; jj++) {
7999           PetscInt    dest_col = idx[jj] / bs;
8000           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
8001 
8002           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
8003         }
8004         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
8005       }
8006       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8007       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8008     }
8009   } else {
8010     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8011     else {
8012       Gmat = Amat;
8013       PetscCall(PetscObjectReference((PetscObject)Gmat));
8014     }
8015     if (isseqaij) {
8016       a = Gmat;
8017       b = NULL;
8018     } else {
8019       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8020       a             = d->A;
8021       b             = d->B;
8022     }
8023     if (filter >= 0 || scale) {
8024       /* take absolute value of each entry */
8025       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8026         MatInfo      info;
8027         PetscScalar *avals;
8028 
8029         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8030         PetscCall(MatSeqAIJGetArray(c, &avals));
8031         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8032         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8033       }
8034     }
8035   }
8036   if (symmetrize) {
8037     PetscBool isset, issym;
8038 
8039     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8040     if (!isset || !issym) {
8041       Mat matTrans;
8042 
8043       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8044       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8045       PetscCall(MatDestroy(&matTrans));
8046     }
8047     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8048   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8049   if (scale) {
8050     /* scale c for all diagonal values = 1 or -1 */
8051     Vec diag;
8052 
8053     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8054     PetscCall(MatGetDiagonal(Gmat, diag));
8055     PetscCall(VecReciprocal(diag));
8056     PetscCall(VecSqrtAbs(diag));
8057     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8058     PetscCall(VecDestroy(&diag));
8059   }
8060   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8061   if (filter >= 0) {
8062     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8063     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8064   }
8065   *a_Gmat = Gmat;
8066   PetscFunctionReturn(PETSC_SUCCESS);
8067 }
8068 
8069 /*
8070     Special version for direct calls from Fortran
8071 */
8072 
8073 /* Change these macros so can be used in void function */
8074 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8075 #undef PetscCall
8076 #define PetscCall(...) \
8077   do { \
8078     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8079     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8080       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8081       return; \
8082     } \
8083   } while (0)
8084 
8085 #undef SETERRQ
8086 #define SETERRQ(comm, ierr, ...) \
8087   do { \
8088     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8089     return; \
8090   } while (0)
8091 
8092 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8093   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8094 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8095   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8096 #else
8097 #endif
8098 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8099 {
8100   Mat         mat = *mmat;
8101   PetscInt    m = *mm, n = *mn;
8102   InsertMode  addv = *maddv;
8103   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8104   PetscScalar value;
8105 
8106   MatCheckPreallocated(mat, 1);
8107   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8108   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8109   {
8110     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8111     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8112     PetscBool roworiented = aij->roworiented;
8113 
8114     /* Some Variables required in the macro */
8115     Mat         A     = aij->A;
8116     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8117     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8118     MatScalar  *aa;
8119     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8120     Mat         B                 = aij->B;
8121     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8122     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8123     MatScalar  *ba;
8124     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8125      * cannot use "#if defined" inside a macro. */
8126     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8127 
8128     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8129     PetscInt   nonew = a->nonew;
8130     MatScalar *ap1, *ap2;
8131 
8132     PetscFunctionBegin;
8133     PetscCall(MatSeqAIJGetArray(A, &aa));
8134     PetscCall(MatSeqAIJGetArray(B, &ba));
8135     for (i = 0; i < m; i++) {
8136       if (im[i] < 0) continue;
8137       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8138       if (im[i] >= rstart && im[i] < rend) {
8139         row      = im[i] - rstart;
8140         lastcol1 = -1;
8141         rp1      = aj + ai[row];
8142         ap1      = aa + ai[row];
8143         rmax1    = aimax[row];
8144         nrow1    = ailen[row];
8145         low1     = 0;
8146         high1    = nrow1;
8147         lastcol2 = -1;
8148         rp2      = bj + bi[row];
8149         ap2      = ba + bi[row];
8150         rmax2    = bimax[row];
8151         nrow2    = bilen[row];
8152         low2     = 0;
8153         high2    = nrow2;
8154 
8155         for (j = 0; j < n; j++) {
8156           if (roworiented) value = v[i * n + j];
8157           else value = v[i + j * m];
8158           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8159           if (in[j] >= cstart && in[j] < cend) {
8160             col = in[j] - cstart;
8161             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8162           } else if (in[j] < 0) continue;
8163           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8164             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8165           } else {
8166             if (mat->was_assembled) {
8167               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8168 #if defined(PETSC_USE_CTABLE)
8169               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8170               col--;
8171 #else
8172               col = aij->colmap[in[j]] - 1;
8173 #endif
8174               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8175                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8176                 col = in[j];
8177                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8178                 B        = aij->B;
8179                 b        = (Mat_SeqAIJ *)B->data;
8180                 bimax    = b->imax;
8181                 bi       = b->i;
8182                 bilen    = b->ilen;
8183                 bj       = b->j;
8184                 rp2      = bj + bi[row];
8185                 ap2      = ba + bi[row];
8186                 rmax2    = bimax[row];
8187                 nrow2    = bilen[row];
8188                 low2     = 0;
8189                 high2    = nrow2;
8190                 bm       = aij->B->rmap->n;
8191                 ba       = b->a;
8192                 inserted = PETSC_FALSE;
8193               }
8194             } else col = in[j];
8195             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8196           }
8197         }
8198       } else if (!aij->donotstash) {
8199         if (roworiented) {
8200           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8201         } else {
8202           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8203         }
8204       }
8205     }
8206     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8207     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8208   }
8209   PetscFunctionReturnVoid();
8210 }
8211 
8212 /* Undefining these here since they were redefined from their original definition above! No
8213  * other PETSc functions should be defined past this point, as it is impossible to recover the
8214  * original definitions */
8215 #undef PetscCall
8216 #undef SETERRQ
8217