xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 9fe822445bcdd45fb93170ff68fa7403d3f52f09)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287   PetscMPIInt        in;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   PetscCall(PetscMPIIntCast(n, &in));
313   if (type == NORM_INFINITY) {
314     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
315   } else {
316     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
317   }
318   PetscCall(PetscFree(work));
319   if (type == NORM_2) {
320     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
321   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
322     for (i = 0; i < n; i++) reductions[i] /= m;
323   }
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
328 {
329   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
330   IS              sis, gis;
331   const PetscInt *isis, *igis;
332   PetscInt        n, *iis, nsis, ngis, rstart, i;
333 
334   PetscFunctionBegin;
335   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
336   PetscCall(MatFindNonzeroRows(a->B, &gis));
337   PetscCall(ISGetSize(gis, &ngis));
338   PetscCall(ISGetSize(sis, &nsis));
339   PetscCall(ISGetIndices(sis, &isis));
340   PetscCall(ISGetIndices(gis, &igis));
341 
342   PetscCall(PetscMalloc1(ngis + nsis, &iis));
343   PetscCall(PetscArraycpy(iis, igis, ngis));
344   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
345   n = ngis + nsis;
346   PetscCall(PetscSortRemoveDupsInt(&n, iis));
347   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
348   for (i = 0; i < n; i++) iis[i] += rstart;
349   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
350 
351   PetscCall(ISRestoreIndices(sis, &isis));
352   PetscCall(ISRestoreIndices(gis, &igis));
353   PetscCall(ISDestroy(&sis));
354   PetscCall(ISDestroy(&gis));
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 /*
359   Local utility routine that creates a mapping from the global column
360 number to the local number in the off-diagonal part of the local
361 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
362 a slightly higher hash table cost; without it it is not scalable (each processor
363 has an order N integer array but is fast to access.
364 */
365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
366 {
367   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
368   PetscInt    n   = aij->B->cmap->n, i;
369 
370   PetscFunctionBegin;
371   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
372 #if defined(PETSC_USE_CTABLE)
373   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
374   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
375 #else
376   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
377   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
378 #endif
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
383   do { \
384     if (col <= lastcol1) low1 = 0; \
385     else high1 = nrow1; \
386     lastcol1 = col; \
387     while (high1 - low1 > 5) { \
388       t = (low1 + high1) / 2; \
389       if (rp1[t] > col) high1 = t; \
390       else low1 = t; \
391     } \
392     for (_i = low1; _i < high1; _i++) { \
393       if (rp1[_i] > col) break; \
394       if (rp1[_i] == col) { \
395         if (addv == ADD_VALUES) { \
396           ap1[_i] += value; \
397           /* Not sure LogFlops will slow dow the code or not */ \
398           (void)PetscLogFlops(1.0); \
399         } else ap1[_i] = value; \
400         goto a_noinsert; \
401       } \
402     } \
403     if (value == 0.0 && ignorezeroentries && row != col) { \
404       low1  = 0; \
405       high1 = nrow1; \
406       goto a_noinsert; \
407     } \
408     if (nonew == 1) { \
409       low1  = 0; \
410       high1 = nrow1; \
411       goto a_noinsert; \
412     } \
413     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
414     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
415     N = nrow1++ - 1; \
416     a->nz++; \
417     high1++; \
418     /* shift up all the later entries in this row */ \
419     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
420     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
421     rp1[_i] = col; \
422     ap1[_i] = value; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467   b_noinsert:; \
468     bilen[row] = nrow2; \
469   } while (0)
470 
471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
472 {
473   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
474   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
475   PetscInt     l, *garray                         = mat->garray, diag;
476   PetscScalar *aa, *ba;
477 
478   PetscFunctionBegin;
479   /* code only works for square matrices A */
480 
481   /* find size of row to the left of the diagonal part */
482   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
483   row = row - diag;
484   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
485     if (garray[b->j[b->i[row] + l]] > diag) break;
486   }
487   if (l) {
488     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
489     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
490     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
491   }
492 
493   /* diagonal part */
494   if (a->i[row + 1] - a->i[row]) {
495     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
496     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
497     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
498   }
499 
500   /* right of diagonal part */
501   if (b->i[row + 1] - b->i[row] - l) {
502     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
503     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
504     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
505   }
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
512   PetscScalar value = 0.0;
513   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
514   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
515   PetscBool   roworiented = aij->roworiented;
516 
517   /* Some Variables required in the macro */
518   Mat         A     = aij->A;
519   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
520   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
521   PetscBool   ignorezeroentries = a->ignorezeroentries;
522   Mat         B                 = aij->B;
523   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
524   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
525   MatScalar  *aa, *ba;
526   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
527   PetscInt    nonew;
528   MatScalar  *ap1, *ap2;
529 
530   PetscFunctionBegin;
531   PetscCall(MatSeqAIJGetArray(A, &aa));
532   PetscCall(MatSeqAIJGetArray(B, &ba));
533   for (i = 0; i < m; i++) {
534     if (im[i] < 0) continue;
535     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
536     if (im[i] >= rstart && im[i] < rend) {
537       row      = im[i] - rstart;
538       lastcol1 = -1;
539       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
540       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
541       rmax1    = aimax[row];
542       nrow1    = ailen[row];
543       low1     = 0;
544       high1    = nrow1;
545       lastcol2 = -1;
546       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
547       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
548       rmax2    = bimax[row];
549       nrow2    = bilen[row];
550       low2     = 0;
551       high2    = nrow2;
552 
553       for (j = 0; j < n; j++) {
554         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
555         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
556         if (in[j] >= cstart && in[j] < cend) {
557           col   = in[j] - cstart;
558           nonew = a->nonew;
559           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
560         } else if (in[j] < 0) {
561           continue;
562         } else {
563           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
564           if (mat->was_assembled) {
565             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
566 #if defined(PETSC_USE_CTABLE)
567             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
568             col--;
569 #else
570             col = aij->colmap[in[j]] - 1;
571 #endif
572             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
573               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
574               col = in[j];
575               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
576               B     = aij->B;
577               b     = (Mat_SeqAIJ *)B->data;
578               bimax = b->imax;
579               bi    = b->i;
580               bilen = b->ilen;
581               bj    = b->j;
582               ba    = b->a;
583               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
584               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
585               rmax2 = bimax[row];
586               nrow2 = bilen[row];
587               low2  = 0;
588               high2 = nrow2;
589               bm    = aij->B->rmap->n;
590               ba    = b->a;
591             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
592               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
593                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
594               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
595             }
596           } else col = in[j];
597           nonew = b->nonew;
598           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
599         }
600       }
601     } else {
602       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
603       if (!aij->donotstash) {
604         mat->assembled = PETSC_FALSE;
605         if (roworiented) {
606           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         } else {
608           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
609         }
610       }
611     }
612   }
613   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
614   PetscCall(MatSeqAIJRestoreArray(B, &ba));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 /*
619     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
620     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
621     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
622 */
623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
624 {
625   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
626   Mat         A      = aij->A; /* diagonal part of the matrix */
627   Mat         B      = aij->B; /* off-diagonal part of the matrix */
628   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
629   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
630   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
631   PetscInt   *ailen = a->ilen, *aj = a->j;
632   PetscInt   *bilen = b->ilen, *bj = b->j;
633   PetscInt    am          = aij->A->rmap->n, j;
634   PetscInt    diag_so_far = 0, dnz;
635   PetscInt    offd_so_far = 0, onz;
636 
637   PetscFunctionBegin;
638   /* Iterate over all rows of the matrix */
639   for (j = 0; j < am; j++) {
640     dnz = onz = 0;
641     /*  Iterate over all non-zero columns of the current row */
642     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
643       /* If column is in the diagonal */
644       if (mat_j[col] >= cstart && mat_j[col] < cend) {
645         aj[diag_so_far++] = mat_j[col] - cstart;
646         dnz++;
647       } else { /* off-diagonal entries */
648         bj[offd_so_far++] = mat_j[col];
649         onz++;
650       }
651     }
652     ailen[j] = dnz;
653     bilen[j] = onz;
654   }
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 /*
659     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
660     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
661     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
662     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
663     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
664 */
665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
666 {
667   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
668   Mat          A    = aij->A; /* diagonal part of the matrix */
669   Mat          B    = aij->B; /* off-diagonal part of the matrix */
670   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
671   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
672   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
673   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
674   PetscInt    *ailen = a->ilen, *aj = a->j;
675   PetscInt    *bilen = b->ilen, *bj = b->j;
676   PetscInt     am          = aij->A->rmap->n, j;
677   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
678   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
679   PetscScalar *aa = a->a, *ba = b->a;
680 
681   PetscFunctionBegin;
682   /* Iterate over all rows of the matrix */
683   for (j = 0; j < am; j++) {
684     dnz_row = onz_row = 0;
685     rowstart_offd     = full_offd_i[j];
686     rowstart_diag     = full_diag_i[j];
687     /*  Iterate over all non-zero columns of the current row */
688     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
689       /* If column is in the diagonal */
690       if (mat_j[col] >= cstart && mat_j[col] < cend) {
691         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
692         aa[rowstart_diag + dnz_row] = mat_a[col];
693         dnz_row++;
694       } else { /* off-diagonal entries */
695         bj[rowstart_offd + onz_row] = mat_j[col];
696         ba[rowstart_offd + onz_row] = mat_a[col];
697         onz_row++;
698       }
699     }
700     ailen[j] = dnz_row;
701     bilen[j] = onz_row;
702   }
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
707 {
708   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
709   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
710   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
711 
712   PetscFunctionBegin;
713   for (i = 0; i < m; i++) {
714     if (idxm[i] < 0) continue; /* negative row */
715     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
716     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
717     row = idxm[i] - rstart;
718     for (j = 0; j < n; j++) {
719       if (idxn[j] < 0) continue; /* negative column */
720       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
721       if (idxn[j] >= cstart && idxn[j] < cend) {
722         col = idxn[j] - cstart;
723         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
724       } else {
725         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
726 #if defined(PETSC_USE_CTABLE)
727         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
728         col--;
729 #else
730         col = aij->colmap[idxn[j]] - 1;
731 #endif
732         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
733         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
734       }
735     }
736   }
737   PetscFunctionReturn(PETSC_SUCCESS);
738 }
739 
740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
741 {
742   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
743   PetscInt    nstash, reallocs;
744 
745   PetscFunctionBegin;
746   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
747 
748   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
749   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
750   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
751   PetscFunctionReturn(PETSC_SUCCESS);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
755 {
756   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
757   PetscMPIInt  n;
758   PetscInt     i, j, rstart, ncols, flg;
759   PetscInt    *row, *col;
760   PetscBool    other_disassembled;
761   PetscScalar *val;
762 
763   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
764 
765   PetscFunctionBegin;
766   if (!aij->donotstash && !mat->nooffprocentries) {
767     while (1) {
768       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
769       if (!flg) break;
770 
771       for (i = 0; i < n;) {
772         /* Now identify the consecutive vals belonging to the same row */
773         for (j = i, rstart = row[j]; j < n; j++) {
774           if (row[j] != rstart) break;
775         }
776         if (j < n) ncols = j - i;
777         else ncols = n - i;
778         /* Now assemble all these values with a single function call */
779         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
780         i = j;
781       }
782     }
783     PetscCall(MatStashScatterEnd_Private(&mat->stash));
784   }
785 #if defined(PETSC_HAVE_DEVICE)
786   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
787   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
788   if (mat->boundtocpu) {
789     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
790     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
791   }
792 #endif
793   PetscCall(MatAssemblyBegin(aij->A, mode));
794   PetscCall(MatAssemblyEnd(aij->A, mode));
795 
796   /* determine if any processor has disassembled, if so we must
797      also disassemble ourself, in order that we may reassemble. */
798   /*
799      if nonzero structure of submatrix B cannot change then we know that
800      no processor disassembled thus we can skip this stuff
801   */
802   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
803     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
804     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
805       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
806     }
807   }
808   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
809   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
810 #if defined(PETSC_HAVE_DEVICE)
811   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
812 #endif
813   PetscCall(MatAssemblyBegin(aij->B, mode));
814   PetscCall(MatAssemblyEnd(aij->B, mode));
815 
816   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
817 
818   aij->rowvalues = NULL;
819 
820   PetscCall(VecDestroy(&aij->diag));
821 
822   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
823   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
824     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
825     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
826   }
827 #if defined(PETSC_HAVE_DEVICE)
828   mat->offloadmask = PETSC_OFFLOAD_BOTH;
829 #endif
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
834 {
835   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
836 
837   PetscFunctionBegin;
838   PetscCall(MatZeroEntries(l->A));
839   PetscCall(MatZeroEntries(l->B));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
844 {
845   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
846   PetscInt   *lrows;
847   PetscInt    r, len;
848   PetscBool   cong;
849 
850   PetscFunctionBegin;
851   /* get locally owned rows */
852   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
853   PetscCall(MatHasCongruentLayouts(A, &cong));
854   /* fix right-hand side if needed */
855   if (x && b) {
856     const PetscScalar *xx;
857     PetscScalar       *bb;
858 
859     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
860     PetscCall(VecGetArrayRead(x, &xx));
861     PetscCall(VecGetArray(b, &bb));
862     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
863     PetscCall(VecRestoreArrayRead(x, &xx));
864     PetscCall(VecRestoreArray(b, &bb));
865   }
866 
867   if (diag != 0.0 && cong) {
868     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
869     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
870   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
871     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
872     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
873     PetscInt    nnwA, nnwB;
874     PetscBool   nnzA, nnzB;
875 
876     nnwA = aijA->nonew;
877     nnwB = aijB->nonew;
878     nnzA = aijA->keepnonzeropattern;
879     nnzB = aijB->keepnonzeropattern;
880     if (!nnzA) {
881       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
882       aijA->nonew = 0;
883     }
884     if (!nnzB) {
885       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
886       aijB->nonew = 0;
887     }
888     /* Must zero here before the next loop */
889     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
890     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
891     for (r = 0; r < len; ++r) {
892       const PetscInt row = lrows[r] + A->rmap->rstart;
893       if (row >= A->cmap->N) continue;
894       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
895     }
896     aijA->nonew = nnwA;
897     aijB->nonew = nnwB;
898   } else {
899     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
900     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
901   }
902   PetscCall(PetscFree(lrows));
903   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
904   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
905 
906   /* only change matrix nonzero state if pattern was allowed to be changed */
907   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
908     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
909     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
910   }
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
915 {
916   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
917   PetscInt           n = A->rmap->n;
918   PetscInt           i, j, r, m, len = 0;
919   PetscInt          *lrows, *owners = A->rmap->range;
920   PetscMPIInt        p = 0;
921   PetscSFNode       *rrows;
922   PetscSF            sf;
923   const PetscScalar *xx;
924   PetscScalar       *bb, *mask, *aij_a;
925   Vec                xmask, lmask;
926   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
927   const PetscInt    *aj, *ii, *ridx;
928   PetscScalar       *aa;
929 
930   PetscFunctionBegin;
931   /* Create SF where leaves are input rows and roots are owned rows */
932   PetscCall(PetscMalloc1(n, &lrows));
933   for (r = 0; r < n; ++r) lrows[r] = -1;
934   PetscCall(PetscMalloc1(N, &rrows));
935   for (r = 0; r < N; ++r) {
936     const PetscInt idx = rows[r];
937     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
938     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
939       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
940     }
941     rrows[r].rank  = p;
942     rrows[r].index = rows[r] - owners[p];
943   }
944   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
945   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
946   /* Collect flags for rows to be zeroed */
947   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
948   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
949   PetscCall(PetscSFDestroy(&sf));
950   /* Compress and put in row numbers */
951   for (r = 0; r < n; ++r)
952     if (lrows[r] >= 0) lrows[len++] = r;
953   /* zero diagonal part of matrix */
954   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
955   /* handle off-diagonal part of matrix */
956   PetscCall(MatCreateVecs(A, &xmask, NULL));
957   PetscCall(VecDuplicate(l->lvec, &lmask));
958   PetscCall(VecGetArray(xmask, &bb));
959   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
960   PetscCall(VecRestoreArray(xmask, &bb));
961   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
962   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
963   PetscCall(VecDestroy(&xmask));
964   if (x && b) { /* this code is buggy when the row and column layout don't match */
965     PetscBool cong;
966 
967     PetscCall(MatHasCongruentLayouts(A, &cong));
968     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
969     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
970     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
971     PetscCall(VecGetArrayRead(l->lvec, &xx));
972     PetscCall(VecGetArray(b, &bb));
973   }
974   PetscCall(VecGetArray(lmask, &mask));
975   /* remove zeroed rows of off-diagonal matrix */
976   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
977   ii = aij->i;
978   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
979   /* loop over all elements of off process part of matrix zeroing removed columns*/
980   if (aij->compressedrow.use) {
981     m    = aij->compressedrow.nrows;
982     ii   = aij->compressedrow.i;
983     ridx = aij->compressedrow.rindex;
984     for (i = 0; i < m; i++) {
985       n  = ii[i + 1] - ii[i];
986       aj = aij->j + ii[i];
987       aa = aij_a + ii[i];
988 
989       for (j = 0; j < n; j++) {
990         if (PetscAbsScalar(mask[*aj])) {
991           if (b) bb[*ridx] -= *aa * xx[*aj];
992           *aa = 0.0;
993         }
994         aa++;
995         aj++;
996       }
997       ridx++;
998     }
999   } else { /* do not use compressed row format */
1000     m = l->B->rmap->n;
1001     for (i = 0; i < m; i++) {
1002       n  = ii[i + 1] - ii[i];
1003       aj = aij->j + ii[i];
1004       aa = aij_a + ii[i];
1005       for (j = 0; j < n; j++) {
1006         if (PetscAbsScalar(mask[*aj])) {
1007           if (b) bb[i] -= *aa * xx[*aj];
1008           *aa = 0.0;
1009         }
1010         aa++;
1011         aj++;
1012       }
1013     }
1014   }
1015   if (x && b) {
1016     PetscCall(VecRestoreArray(b, &bb));
1017     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1018   }
1019   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1020   PetscCall(VecRestoreArray(lmask, &mask));
1021   PetscCall(VecDestroy(&lmask));
1022   PetscCall(PetscFree(lrows));
1023 
1024   /* only change matrix nonzero state if pattern was allowed to be changed */
1025   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1026     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1027     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1028   }
1029   PetscFunctionReturn(PETSC_SUCCESS);
1030 }
1031 
1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1033 {
1034   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1035   PetscInt    nt;
1036   VecScatter  Mvctx = a->Mvctx;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(VecGetLocalSize(xx, &nt));
1040   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1041   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->A, mult, xx, yy);
1043   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1044   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1045   PetscFunctionReturn(PETSC_SUCCESS);
1046 }
1047 
1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1049 {
1050   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1051 
1052   PetscFunctionBegin;
1053   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1058 {
1059   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1060   VecScatter  Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1065   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1071 {
1072   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073 
1074   PetscFunctionBegin;
1075   /* do nondiagonal part */
1076   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1077   /* do local part */
1078   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1079   /* add partial results together */
1080   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1081   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1086 {
1087   MPI_Comm    comm;
1088   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1089   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1090   IS          Me, Notme;
1091   PetscInt    M, N, first, last, *notme, i;
1092   PetscBool   lf;
1093   PetscMPIInt size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1098   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1099   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1100   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1101   PetscCallMPI(MPI_Comm_size(comm, &size));
1102   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1103 
1104   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1105   PetscCall(MatGetSize(Amat, &M, &N));
1106   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1107   PetscCall(PetscMalloc1(N - last + first, &notme));
1108   for (i = 0; i < first; i++) notme[i] = i;
1109   for (i = last; i < M; i++) notme[i - last + first] = i;
1110   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1111   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1112   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1113   Aoff = Aoffs[0];
1114   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1115   Boff = Boffs[0];
1116   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1117   PetscCall(MatDestroyMatrices(1, &Aoffs));
1118   PetscCall(MatDestroyMatrices(1, &Boffs));
1119   PetscCall(ISDestroy(&Me));
1120   PetscCall(ISDestroy(&Notme));
1121   PetscCall(PetscFree(notme));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1126 {
1127   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1128 
1129   PetscFunctionBegin;
1130   /* do nondiagonal part */
1131   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1132   /* do local part */
1133   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1134   /* add partial results together */
1135   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1136   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1137   PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139 
1140 /*
1141   This only works correctly for square matrices where the subblock A->A is the
1142    diagonal block
1143 */
1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1150   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1151   PetscCall(MatGetDiagonal(a->A, v));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1156 {
1157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatScale(a->A, aa));
1161   PetscCall(MatScale(a->B, aa));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1166 {
1167   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1168   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1169   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1170   const PetscInt    *garray = aij->garray;
1171   const PetscScalar *aa, *ba;
1172   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1173   PetscInt64         nz, hnz;
1174   PetscInt          *rowlens;
1175   PetscInt          *colidxs;
1176   PetscScalar       *matvals;
1177   PetscMPIInt        rank;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(PetscViewerSetUp(viewer));
1181 
1182   M  = mat->rmap->N;
1183   N  = mat->cmap->N;
1184   m  = mat->rmap->n;
1185   rs = mat->rmap->rstart;
1186   cs = mat->cmap->rstart;
1187   nz = A->nz + B->nz;
1188 
1189   /* write matrix header */
1190   header[0] = MAT_FILE_CLASSID;
1191   header[1] = M;
1192   header[2] = N;
1193   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1194   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1195   if (rank == 0) {
1196     if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX;
1197     else header[3] = (PetscInt)hnz;
1198   }
1199   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1200 
1201   /* fill in and store row lengths  */
1202   PetscCall(PetscMalloc1(m, &rowlens));
1203   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1205   PetscCall(PetscFree(rowlens));
1206 
1207   /* fill in and store column indices */
1208   PetscCall(PetscMalloc1(nz, &colidxs));
1209   for (cnt = 0, i = 0; i < m; i++) {
1210     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1211       if (garray[B->j[jb]] > cs) break;
1212       colidxs[cnt++] = garray[B->j[jb]];
1213     }
1214     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1215     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1216   }
1217   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1218   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1219   PetscCall(PetscFree(colidxs));
1220 
1221   /* fill in and store nonzero values */
1222   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1223   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1224   PetscCall(PetscMalloc1(nz, &matvals));
1225   for (cnt = 0, i = 0; i < m; i++) {
1226     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       matvals[cnt++] = ba[jb];
1229     }
1230     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1231     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1232   }
1233   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1235   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1236   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1237   PetscCall(PetscFree(matvals));
1238 
1239   /* write block size option to the viewer's .info file */
1240   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 #include <petscdraw.h>
1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1246 {
1247   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1248   PetscMPIInt       rank = aij->rank, size = aij->size;
1249   PetscBool         isdraw, iascii, isbinary;
1250   PetscViewer       sviewer;
1251   PetscViewerFormat format;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1255   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1256   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1257   if (iascii) {
1258     PetscCall(PetscViewerGetFormat(viewer, &format));
1259     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1260       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1261       PetscCall(PetscMalloc1(size, &nz));
1262       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1263       for (i = 0; i < (PetscInt)size; i++) {
1264         nmax = PetscMax(nmax, nz[i]);
1265         nmin = PetscMin(nmin, nz[i]);
1266         navg += nz[i];
1267       }
1268       PetscCall(PetscFree(nz));
1269       navg = navg / size;
1270       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1271       PetscFunctionReturn(PETSC_SUCCESS);
1272     }
1273     PetscCall(PetscViewerGetFormat(viewer, &format));
1274     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1275       MatInfo   info;
1276       PetscInt *inodes = NULL;
1277 
1278       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1279       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1280       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1281       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1282       if (!inodes) {
1283         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1284                                                      (double)info.memory));
1285       } else {
1286         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1287                                                      (double)info.memory));
1288       }
1289       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1292       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1293       PetscCall(PetscViewerFlush(viewer));
1294       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1295       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1296       PetscCall(VecScatterView(aij->Mvctx, viewer));
1297       PetscFunctionReturn(PETSC_SUCCESS);
1298     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1299       PetscInt inodecount, inodelimit, *inodes;
1300       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1301       if (inodes) {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1303       } else {
1304         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1305       }
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     }
1310   } else if (isbinary) {
1311     if (size == 1) {
1312       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1313       PetscCall(MatView(aij->A, viewer));
1314     } else {
1315       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1316     }
1317     PetscFunctionReturn(PETSC_SUCCESS);
1318   } else if (iascii && size == 1) {
1319     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1320     PetscCall(MatView(aij->A, viewer));
1321     PetscFunctionReturn(PETSC_SUCCESS);
1322   } else if (isdraw) {
1323     PetscDraw draw;
1324     PetscBool isnull;
1325     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1326     PetscCall(PetscDrawIsNull(draw, &isnull));
1327     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1328   }
1329 
1330   { /* assemble the entire matrix onto first processor */
1331     Mat A = NULL, Av;
1332     IS  isrow, iscol;
1333 
1334     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1335     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1336     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1337     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1338     /*  The commented code uses MatCreateSubMatrices instead */
1339     /*
1340     Mat *AA, A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1344     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1345     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1346     if (rank == 0) {
1347        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1348        A    = AA[0];
1349        Av   = AA[0];
1350     }
1351     PetscCall(MatDestroySubMatrices(1,&AA));
1352 */
1353     PetscCall(ISDestroy(&iscol));
1354     PetscCall(ISDestroy(&isrow));
1355     /*
1356        Everyone has to call to draw the matrix since the graphics waits are
1357        synchronized across all processors that share the PetscDraw object
1358     */
1359     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1360     if (rank == 0) {
1361       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1362       PetscCall(MatView_SeqAIJ(Av, sviewer));
1363     }
1364     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1365     PetscCall(MatDestroy(&A));
1366   }
1367   PetscFunctionReturn(PETSC_SUCCESS);
1368 }
1369 
1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1371 {
1372   PetscBool iascii, isdraw, issocket, isbinary;
1373 
1374   PetscFunctionBegin;
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1377   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1378   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1379   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 
1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1384 {
1385   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1386   Vec         bb1 = NULL;
1387   PetscBool   hasop;
1388 
1389   PetscFunctionBegin;
1390   if (flag == SOR_APPLY_UPPER) {
1391     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1392     PetscFunctionReturn(PETSC_SUCCESS);
1393   }
1394 
1395   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1396 
1397   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1398     if (flag & SOR_ZERO_INITIAL_GUESS) {
1399       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1400       its--;
1401     }
1402 
1403     while (its--) {
1404       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1405       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1406 
1407       /* update rhs: bb1 = bb - B*x */
1408       PetscCall(VecScale(mat->lvec, -1.0));
1409       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1410 
1411       /* local sweep */
1412       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1413     }
1414   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419     while (its--) {
1420       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422 
1423       /* update rhs: bb1 = bb - B*x */
1424       PetscCall(VecScale(mat->lvec, -1.0));
1425       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1426 
1427       /* local sweep */
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1429     }
1430   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1433       its--;
1434     }
1435     while (its--) {
1436       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438 
1439       /* update rhs: bb1 = bb - B*x */
1440       PetscCall(VecScale(mat->lvec, -1.0));
1441       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1442 
1443       /* local sweep */
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1445     }
1446   } else if (flag & SOR_EISENSTAT) {
1447     Vec xx1;
1448 
1449     PetscCall(VecDuplicate(bb, &xx1));
1450     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1451 
1452     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454     if (!mat->diag) {
1455       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1456       PetscCall(MatGetDiagonal(matin, mat->diag));
1457     }
1458     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1459     if (hasop) {
1460       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1461     } else {
1462       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1463     }
1464     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1465 
1466     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1467 
1468     /* local sweep */
1469     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1470     PetscCall(VecAXPY(xx, 1.0, xx1));
1471     PetscCall(VecDestroy(&xx1));
1472   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1473 
1474   PetscCall(VecDestroy(&bb1));
1475 
1476   matin->factorerrortype = mat->A->factorerrortype;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1481 {
1482   Mat             aA, aB, Aperm;
1483   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1484   PetscScalar    *aa, *ba;
1485   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1486   PetscSF         rowsf, sf;
1487   IS              parcolp = NULL;
1488   PetscBool       done;
1489 
1490   PetscFunctionBegin;
1491   PetscCall(MatGetLocalSize(A, &m, &n));
1492   PetscCall(ISGetIndices(rowp, &rwant));
1493   PetscCall(ISGetIndices(colp, &cwant));
1494   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1495 
1496   /* Invert row permutation to find out where my rows should go */
1497   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1498   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1499   PetscCall(PetscSFSetFromOptions(rowsf));
1500   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1501   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1502   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1503 
1504   /* Invert column permutation to find out where my columns should go */
1505   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1506   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1507   PetscCall(PetscSFSetFromOptions(sf));
1508   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1509   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1510   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1511   PetscCall(PetscSFDestroy(&sf));
1512 
1513   PetscCall(ISRestoreIndices(rowp, &rwant));
1514   PetscCall(ISRestoreIndices(colp, &cwant));
1515   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1516 
1517   /* Find out where my gcols should go */
1518   PetscCall(MatGetSize(aB, NULL, &ng));
1519   PetscCall(PetscMalloc1(ng, &gcdest));
1520   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1521   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1522   PetscCall(PetscSFSetFromOptions(sf));
1523   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1524   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1525   PetscCall(PetscSFDestroy(&sf));
1526 
1527   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1528   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1529   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1530   for (i = 0; i < m; i++) {
1531     PetscInt    row = rdest[i];
1532     PetscMPIInt rowner;
1533     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1534     for (j = ai[i]; j < ai[i + 1]; j++) {
1535       PetscInt    col = cdest[aj[j]];
1536       PetscMPIInt cowner;
1537       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1538       if (rowner == cowner) dnnz[i]++;
1539       else onnz[i]++;
1540     }
1541     for (j = bi[i]; j < bi[i + 1]; j++) {
1542       PetscInt    col = gcdest[bj[j]];
1543       PetscMPIInt cowner;
1544       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1545       if (rowner == cowner) dnnz[i]++;
1546       else onnz[i]++;
1547     }
1548   }
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1551   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1552   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1553   PetscCall(PetscSFDestroy(&rowsf));
1554 
1555   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1556   PetscCall(MatSeqAIJGetArray(aA, &aa));
1557   PetscCall(MatSeqAIJGetArray(aB, &ba));
1558   for (i = 0; i < m; i++) {
1559     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1560     PetscInt  j0, rowlen;
1561     rowlen = ai[i + 1] - ai[i];
1562     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1563       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1564       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1565     }
1566     rowlen = bi[i + 1] - bi[i];
1567     for (j0 = j = 0; j < rowlen; j0 = j) {
1568       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1569       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1570     }
1571   }
1572   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1574   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1575   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1576   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1577   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1578   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1579   PetscCall(PetscFree3(work, rdest, cdest));
1580   PetscCall(PetscFree(gcdest));
1581   if (parcolp) PetscCall(ISDestroy(&colp));
1582   *B = Aperm;
1583   PetscFunctionReturn(PETSC_SUCCESS);
1584 }
1585 
1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1587 {
1588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1589 
1590   PetscFunctionBegin;
1591   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1592   if (ghosts) *ghosts = aij->garray;
1593   PetscFunctionReturn(PETSC_SUCCESS);
1594 }
1595 
1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1597 {
1598   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1599   Mat            A = mat->A, B = mat->B;
1600   PetscLogDouble isend[5], irecv[5];
1601 
1602   PetscFunctionBegin;
1603   info->block_size = 1.0;
1604   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1605 
1606   isend[0] = info->nz_used;
1607   isend[1] = info->nz_allocated;
1608   isend[2] = info->nz_unneeded;
1609   isend[3] = info->memory;
1610   isend[4] = info->mallocs;
1611 
1612   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1613 
1614   isend[0] += info->nz_used;
1615   isend[1] += info->nz_allocated;
1616   isend[2] += info->nz_unneeded;
1617   isend[3] += info->memory;
1618   isend[4] += info->mallocs;
1619   if (flag == MAT_LOCAL) {
1620     info->nz_used      = isend[0];
1621     info->nz_allocated = isend[1];
1622     info->nz_unneeded  = isend[2];
1623     info->memory       = isend[3];
1624     info->mallocs      = isend[4];
1625   } else if (flag == MAT_GLOBAL_MAX) {
1626     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1627 
1628     info->nz_used      = irecv[0];
1629     info->nz_allocated = irecv[1];
1630     info->nz_unneeded  = irecv[2];
1631     info->memory       = irecv[3];
1632     info->mallocs      = irecv[4];
1633   } else if (flag == MAT_GLOBAL_SUM) {
1634     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1635 
1636     info->nz_used      = irecv[0];
1637     info->nz_allocated = irecv[1];
1638     info->nz_unneeded  = irecv[2];
1639     info->memory       = irecv[3];
1640     info->mallocs      = irecv[4];
1641   }
1642   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1643   info->fill_ratio_needed = 0;
1644   info->factor_mallocs    = 0;
1645   PetscFunctionReturn(PETSC_SUCCESS);
1646 }
1647 
1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1649 {
1650   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1651 
1652   PetscFunctionBegin;
1653   switch (op) {
1654   case MAT_NEW_NONZERO_LOCATIONS:
1655   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1656   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1657   case MAT_KEEP_NONZERO_PATTERN:
1658   case MAT_NEW_NONZERO_LOCATION_ERR:
1659   case MAT_USE_INODES:
1660   case MAT_IGNORE_ZERO_ENTRIES:
1661   case MAT_FORM_EXPLICIT_TRANSPOSE:
1662     MatCheckPreallocated(A, 1);
1663     PetscCall(MatSetOption(a->A, op, flg));
1664     PetscCall(MatSetOption(a->B, op, flg));
1665     break;
1666   case MAT_ROW_ORIENTED:
1667     MatCheckPreallocated(A, 1);
1668     a->roworiented = flg;
1669 
1670     PetscCall(MatSetOption(a->A, op, flg));
1671     PetscCall(MatSetOption(a->B, op, flg));
1672     break;
1673   case MAT_FORCE_DIAGONAL_ENTRIES:
1674   case MAT_SORTED_FULL:
1675     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1676     break;
1677   case MAT_IGNORE_OFF_PROC_ENTRIES:
1678     a->donotstash = flg;
1679     break;
1680   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1681   case MAT_SPD:
1682   case MAT_SYMMETRIC:
1683   case MAT_STRUCTURALLY_SYMMETRIC:
1684   case MAT_HERMITIAN:
1685   case MAT_SYMMETRY_ETERNAL:
1686   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1687   case MAT_SPD_ETERNAL:
1688     /* if the diagonal matrix is square it inherits some of the properties above */
1689     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1690     break;
1691   case MAT_SUBMAT_SINGLEIS:
1692     A->submat_singleis = flg;
1693     break;
1694   case MAT_STRUCTURE_ONLY:
1695     /* The option is handled directly by MatSetOption() */
1696     break;
1697   default:
1698     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1699   }
1700   PetscFunctionReturn(PETSC_SUCCESS);
1701 }
1702 
1703 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1704 {
1705   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1706   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1707   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1708   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1709   PetscInt    *cmap, *idx_p;
1710 
1711   PetscFunctionBegin;
1712   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1713   mat->getrowactive = PETSC_TRUE;
1714 
1715   if (!mat->rowvalues && (idx || v)) {
1716     /*
1717         allocate enough space to hold information from the longest row.
1718     */
1719     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1720     PetscInt    max = 1, tmp;
1721     for (i = 0; i < matin->rmap->n; i++) {
1722       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1723       if (max < tmp) max = tmp;
1724     }
1725     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1726   }
1727 
1728   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1729   lrow = row - rstart;
1730 
1731   pvA = &vworkA;
1732   pcA = &cworkA;
1733   pvB = &vworkB;
1734   pcB = &cworkB;
1735   if (!v) {
1736     pvA = NULL;
1737     pvB = NULL;
1738   }
1739   if (!idx) {
1740     pcA = NULL;
1741     if (!v) pcB = NULL;
1742   }
1743   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1744   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1745   nztot = nzA + nzB;
1746 
1747   cmap = mat->garray;
1748   if (v || idx) {
1749     if (nztot) {
1750       /* Sort by increasing column numbers, assuming A and B already sorted */
1751       PetscInt imark = -1;
1752       if (v) {
1753         *v = v_p = mat->rowvalues;
1754         for (i = 0; i < nzB; i++) {
1755           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1756           else break;
1757         }
1758         imark = i;
1759         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1760         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1761       }
1762       if (idx) {
1763         *idx = idx_p = mat->rowindices;
1764         if (imark > -1) {
1765           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1766         } else {
1767           for (i = 0; i < nzB; i++) {
1768             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1769             else break;
1770           }
1771           imark = i;
1772         }
1773         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1774         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1775       }
1776     } else {
1777       if (idx) *idx = NULL;
1778       if (v) *v = NULL;
1779     }
1780   }
1781   *nz = nztot;
1782   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1783   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1784   PetscFunctionReturn(PETSC_SUCCESS);
1785 }
1786 
1787 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1788 {
1789   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1790 
1791   PetscFunctionBegin;
1792   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1793   aij->getrowactive = PETSC_FALSE;
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1798 {
1799   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1800   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1801   PetscInt         i, j, cstart = mat->cmap->rstart;
1802   PetscReal        sum = 0.0;
1803   const MatScalar *v, *amata, *bmata;
1804   PetscMPIInt      iN;
1805 
1806   PetscFunctionBegin;
1807   if (aij->size == 1) {
1808     PetscCall(MatNorm(aij->A, type, norm));
1809   } else {
1810     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1811     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1812     if (type == NORM_FROBENIUS) {
1813       v = amata;
1814       for (i = 0; i < amat->nz; i++) {
1815         sum += PetscRealPart(PetscConj(*v) * (*v));
1816         v++;
1817       }
1818       v = bmata;
1819       for (i = 0; i < bmat->nz; i++) {
1820         sum += PetscRealPart(PetscConj(*v) * (*v));
1821         v++;
1822       }
1823       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1824       *norm = PetscSqrtReal(*norm);
1825       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1826     } else if (type == NORM_1) { /* max column norm */
1827       PetscReal *tmp, *tmp2;
1828       PetscInt  *jj, *garray = aij->garray;
1829       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1830       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1831       *norm = 0.0;
1832       v     = amata;
1833       jj    = amat->j;
1834       for (j = 0; j < amat->nz; j++) {
1835         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1836         v++;
1837       }
1838       v  = bmata;
1839       jj = bmat->j;
1840       for (j = 0; j < bmat->nz; j++) {
1841         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1842         v++;
1843       }
1844       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
1845       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1846       for (j = 0; j < mat->cmap->N; j++) {
1847         if (tmp2[j] > *norm) *norm = tmp2[j];
1848       }
1849       PetscCall(PetscFree(tmp));
1850       PetscCall(PetscFree(tmp2));
1851       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1852     } else if (type == NORM_INFINITY) { /* max row norm */
1853       PetscReal ntemp = 0.0;
1854       for (j = 0; j < aij->A->rmap->n; j++) {
1855         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1856         sum = 0.0;
1857         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1858           sum += PetscAbsScalar(*v);
1859           v++;
1860         }
1861         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1862         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1863           sum += PetscAbsScalar(*v);
1864           v++;
1865         }
1866         if (sum > ntemp) ntemp = sum;
1867       }
1868       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1869       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1870     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1871     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1872     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1873   }
1874   PetscFunctionReturn(PETSC_SUCCESS);
1875 }
1876 
1877 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1878 {
1879   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1880   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1881   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1882   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1883   Mat              B, A_diag, *B_diag;
1884   const MatScalar *pbv, *bv;
1885 
1886   PetscFunctionBegin;
1887   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1888   ma = A->rmap->n;
1889   na = A->cmap->n;
1890   mb = a->B->rmap->n;
1891   nb = a->B->cmap->n;
1892   ai = Aloc->i;
1893   aj = Aloc->j;
1894   bi = Bloc->i;
1895   bj = Bloc->j;
1896   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1897     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1898     PetscSFNode         *oloc;
1899     PETSC_UNUSED PetscSF sf;
1900 
1901     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1902     /* compute d_nnz for preallocation */
1903     PetscCall(PetscArrayzero(d_nnz, na));
1904     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1905     /* compute local off-diagonal contributions */
1906     PetscCall(PetscArrayzero(g_nnz, nb));
1907     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1908     /* map those to global */
1909     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1910     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1911     PetscCall(PetscSFSetFromOptions(sf));
1912     PetscCall(PetscArrayzero(o_nnz, na));
1913     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1914     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1915     PetscCall(PetscSFDestroy(&sf));
1916 
1917     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1918     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1919     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1920     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1921     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1922     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1923   } else {
1924     B = *matout;
1925     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1926   }
1927 
1928   b           = (Mat_MPIAIJ *)B->data;
1929   A_diag      = a->A;
1930   B_diag      = &b->A;
1931   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1932   A_diag_ncol = A_diag->cmap->N;
1933   B_diag_ilen = sub_B_diag->ilen;
1934   B_diag_i    = sub_B_diag->i;
1935 
1936   /* Set ilen for diagonal of B */
1937   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1938 
1939   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1940   very quickly (=without using MatSetValues), because all writes are local. */
1941   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1942   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1943 
1944   /* copy over the B part */
1945   PetscCall(PetscMalloc1(bi[mb], &cols));
1946   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1947   pbv = bv;
1948   row = A->rmap->rstart;
1949   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1950   cols_tmp = cols;
1951   for (i = 0; i < mb; i++) {
1952     ncol = bi[i + 1] - bi[i];
1953     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1954     row++;
1955     if (pbv) pbv += ncol;
1956     if (cols_tmp) cols_tmp += ncol;
1957   }
1958   PetscCall(PetscFree(cols));
1959   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1960 
1961   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1962   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1963   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1964     *matout = B;
1965   } else {
1966     PetscCall(MatHeaderMerge(A, &B));
1967   }
1968   PetscFunctionReturn(PETSC_SUCCESS);
1969 }
1970 
1971 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1972 {
1973   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1974   Mat         a = aij->A, b = aij->B;
1975   PetscInt    s1, s2, s3;
1976 
1977   PetscFunctionBegin;
1978   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1979   if (rr) {
1980     PetscCall(VecGetLocalSize(rr, &s1));
1981     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1982     /* Overlap communication with computation. */
1983     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1984   }
1985   if (ll) {
1986     PetscCall(VecGetLocalSize(ll, &s1));
1987     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1988     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1989   }
1990   /* scale  the diagonal block */
1991   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1992 
1993   if (rr) {
1994     /* Do a scatter end and then right scale the off-diagonal block */
1995     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1996     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1997   }
1998   PetscFunctionReturn(PETSC_SUCCESS);
1999 }
2000 
2001 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2002 {
2003   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2004 
2005   PetscFunctionBegin;
2006   PetscCall(MatSetUnfactored(a->A));
2007   PetscFunctionReturn(PETSC_SUCCESS);
2008 }
2009 
2010 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2011 {
2012   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2013   Mat         a, b, c, d;
2014   PetscBool   flg;
2015 
2016   PetscFunctionBegin;
2017   a = matA->A;
2018   b = matA->B;
2019   c = matB->A;
2020   d = matB->B;
2021 
2022   PetscCall(MatEqual(a, c, &flg));
2023   if (flg) PetscCall(MatEqual(b, d, &flg));
2024   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2025   PetscFunctionReturn(PETSC_SUCCESS);
2026 }
2027 
2028 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2029 {
2030   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2031   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2032 
2033   PetscFunctionBegin;
2034   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2035   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2036     /* because of the column compression in the off-processor part of the matrix a->B,
2037        the number of columns in a->B and b->B may be different, hence we cannot call
2038        the MatCopy() directly on the two parts. If need be, we can provide a more
2039        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2040        then copying the submatrices */
2041     PetscCall(MatCopy_Basic(A, B, str));
2042   } else {
2043     PetscCall(MatCopy(a->A, b->A, str));
2044     PetscCall(MatCopy(a->B, b->B, str));
2045   }
2046   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2047   PetscFunctionReturn(PETSC_SUCCESS);
2048 }
2049 
2050 /*
2051    Computes the number of nonzeros per row needed for preallocation when X and Y
2052    have different nonzero structure.
2053 */
2054 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2055 {
2056   PetscInt i, j, k, nzx, nzy;
2057 
2058   PetscFunctionBegin;
2059   /* Set the number of nonzeros in the new matrix */
2060   for (i = 0; i < m; i++) {
2061     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2062     nzx    = xi[i + 1] - xi[i];
2063     nzy    = yi[i + 1] - yi[i];
2064     nnz[i] = 0;
2065     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2066       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2067       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2068       nnz[i]++;
2069     }
2070     for (; k < nzy; k++) nnz[i]++;
2071   }
2072   PetscFunctionReturn(PETSC_SUCCESS);
2073 }
2074 
2075 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2076 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2077 {
2078   PetscInt    m = Y->rmap->N;
2079   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2080   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2081 
2082   PetscFunctionBegin;
2083   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2084   PetscFunctionReturn(PETSC_SUCCESS);
2085 }
2086 
2087 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2088 {
2089   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2090 
2091   PetscFunctionBegin;
2092   if (str == SAME_NONZERO_PATTERN) {
2093     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2094     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2095   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2096     PetscCall(MatAXPY_Basic(Y, a, X, str));
2097   } else {
2098     Mat       B;
2099     PetscInt *nnz_d, *nnz_o;
2100 
2101     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2102     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2103     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2104     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2105     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2106     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2107     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2108     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2109     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2110     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2111     PetscCall(MatHeaderMerge(Y, &B));
2112     PetscCall(PetscFree(nnz_d));
2113     PetscCall(PetscFree(nnz_o));
2114   }
2115   PetscFunctionReturn(PETSC_SUCCESS);
2116 }
2117 
2118 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2119 
2120 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2121 {
2122   PetscFunctionBegin;
2123   if (PetscDefined(USE_COMPLEX)) {
2124     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2125 
2126     PetscCall(MatConjugate_SeqAIJ(aij->A));
2127     PetscCall(MatConjugate_SeqAIJ(aij->B));
2128   }
2129   PetscFunctionReturn(PETSC_SUCCESS);
2130 }
2131 
2132 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2133 {
2134   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2135 
2136   PetscFunctionBegin;
2137   PetscCall(MatRealPart(a->A));
2138   PetscCall(MatRealPart(a->B));
2139   PetscFunctionReturn(PETSC_SUCCESS);
2140 }
2141 
2142 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2143 {
2144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2145 
2146   PetscFunctionBegin;
2147   PetscCall(MatImaginaryPart(a->A));
2148   PetscCall(MatImaginaryPart(a->B));
2149   PetscFunctionReturn(PETSC_SUCCESS);
2150 }
2151 
2152 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2153 {
2154   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2155   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2156   PetscScalar       *vv;
2157   Vec                vB, vA;
2158   const PetscScalar *va, *vb;
2159 
2160   PetscFunctionBegin;
2161   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2162   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2163 
2164   PetscCall(VecGetArrayRead(vA, &va));
2165   if (idx) {
2166     for (i = 0; i < m; i++) {
2167       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2168     }
2169   }
2170 
2171   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2172   PetscCall(PetscMalloc1(m, &idxb));
2173   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2174 
2175   PetscCall(VecGetArrayWrite(v, &vv));
2176   PetscCall(VecGetArrayRead(vB, &vb));
2177   for (i = 0; i < m; i++) {
2178     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2179       vv[i] = vb[i];
2180       if (idx) idx[i] = a->garray[idxb[i]];
2181     } else {
2182       vv[i] = va[i];
2183       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2184     }
2185   }
2186   PetscCall(VecRestoreArrayWrite(v, &vv));
2187   PetscCall(VecRestoreArrayRead(vA, &va));
2188   PetscCall(VecRestoreArrayRead(vB, &vb));
2189   PetscCall(PetscFree(idxb));
2190   PetscCall(VecDestroy(&vA));
2191   PetscCall(VecDestroy(&vB));
2192   PetscFunctionReturn(PETSC_SUCCESS);
2193 }
2194 
2195 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2196 {
2197   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2198   Vec         vB, vA;
2199 
2200   PetscFunctionBegin;
2201   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2202   PetscCall(MatGetRowSumAbs(a->A, vA));
2203   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2204   PetscCall(MatGetRowSumAbs(a->B, vB));
2205   PetscCall(VecAXPY(vA, 1.0, vB));
2206   PetscCall(VecDestroy(&vB));
2207   PetscCall(VecCopy(vA, v));
2208   PetscCall(VecDestroy(&vA));
2209   PetscFunctionReturn(PETSC_SUCCESS);
2210 }
2211 
2212 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2213 {
2214   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2215   PetscInt           m = A->rmap->n, n = A->cmap->n;
2216   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2217   PetscInt          *cmap = mat->garray;
2218   PetscInt          *diagIdx, *offdiagIdx;
2219   Vec                diagV, offdiagV;
2220   PetscScalar       *a, *diagA, *offdiagA;
2221   const PetscScalar *ba, *bav;
2222   PetscInt           r, j, col, ncols, *bi, *bj;
2223   Mat                B = mat->B;
2224   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2225 
2226   PetscFunctionBegin;
2227   /* When a process holds entire A and other processes have no entry */
2228   if (A->cmap->N == n) {
2229     PetscCall(VecGetArrayWrite(v, &diagA));
2230     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2231     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2232     PetscCall(VecDestroy(&diagV));
2233     PetscCall(VecRestoreArrayWrite(v, &diagA));
2234     PetscFunctionReturn(PETSC_SUCCESS);
2235   } else if (n == 0) {
2236     if (m) {
2237       PetscCall(VecGetArrayWrite(v, &a));
2238       for (r = 0; r < m; r++) {
2239         a[r] = 0.0;
2240         if (idx) idx[r] = -1;
2241       }
2242       PetscCall(VecRestoreArrayWrite(v, &a));
2243     }
2244     PetscFunctionReturn(PETSC_SUCCESS);
2245   }
2246 
2247   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2249   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2250   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2251 
2252   /* Get offdiagIdx[] for implicit 0.0 */
2253   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2254   ba = bav;
2255   bi = b->i;
2256   bj = b->j;
2257   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2258   for (r = 0; r < m; r++) {
2259     ncols = bi[r + 1] - bi[r];
2260     if (ncols == A->cmap->N - n) { /* Brow is dense */
2261       offdiagA[r]   = *ba;
2262       offdiagIdx[r] = cmap[0];
2263     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2264       offdiagA[r] = 0.0;
2265 
2266       /* Find first hole in the cmap */
2267       for (j = 0; j < ncols; j++) {
2268         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2269         if (col > j && j < cstart) {
2270           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2271           break;
2272         } else if (col > j + n && j >= cstart) {
2273           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2274           break;
2275         }
2276       }
2277       if (j == ncols && ncols < A->cmap->N - n) {
2278         /* a hole is outside compressed Bcols */
2279         if (ncols == 0) {
2280           if (cstart) {
2281             offdiagIdx[r] = 0;
2282           } else offdiagIdx[r] = cend;
2283         } else { /* ncols > 0 */
2284           offdiagIdx[r] = cmap[ncols - 1] + 1;
2285           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2286         }
2287       }
2288     }
2289 
2290     for (j = 0; j < ncols; j++) {
2291       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2292         offdiagA[r]   = *ba;
2293         offdiagIdx[r] = cmap[*bj];
2294       }
2295       ba++;
2296       bj++;
2297     }
2298   }
2299 
2300   PetscCall(VecGetArrayWrite(v, &a));
2301   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2302   for (r = 0; r < m; ++r) {
2303     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2304       a[r] = diagA[r];
2305       if (idx) idx[r] = cstart + diagIdx[r];
2306     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2307       a[r] = diagA[r];
2308       if (idx) {
2309         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2310           idx[r] = cstart + diagIdx[r];
2311         } else idx[r] = offdiagIdx[r];
2312       }
2313     } else {
2314       a[r] = offdiagA[r];
2315       if (idx) idx[r] = offdiagIdx[r];
2316     }
2317   }
2318   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2319   PetscCall(VecRestoreArrayWrite(v, &a));
2320   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2321   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2322   PetscCall(VecDestroy(&diagV));
2323   PetscCall(VecDestroy(&offdiagV));
2324   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2325   PetscFunctionReturn(PETSC_SUCCESS);
2326 }
2327 
2328 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2329 {
2330   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2331   PetscInt           m = A->rmap->n, n = A->cmap->n;
2332   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2333   PetscInt          *cmap = mat->garray;
2334   PetscInt          *diagIdx, *offdiagIdx;
2335   Vec                diagV, offdiagV;
2336   PetscScalar       *a, *diagA, *offdiagA;
2337   const PetscScalar *ba, *bav;
2338   PetscInt           r, j, col, ncols, *bi, *bj;
2339   Mat                B = mat->B;
2340   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2341 
2342   PetscFunctionBegin;
2343   /* When a process holds entire A and other processes have no entry */
2344   if (A->cmap->N == n) {
2345     PetscCall(VecGetArrayWrite(v, &diagA));
2346     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2347     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2348     PetscCall(VecDestroy(&diagV));
2349     PetscCall(VecRestoreArrayWrite(v, &diagA));
2350     PetscFunctionReturn(PETSC_SUCCESS);
2351   } else if (n == 0) {
2352     if (m) {
2353       PetscCall(VecGetArrayWrite(v, &a));
2354       for (r = 0; r < m; r++) {
2355         a[r] = PETSC_MAX_REAL;
2356         if (idx) idx[r] = -1;
2357       }
2358       PetscCall(VecRestoreArrayWrite(v, &a));
2359     }
2360     PetscFunctionReturn(PETSC_SUCCESS);
2361   }
2362 
2363   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2364   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2365   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2366   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2367 
2368   /* Get offdiagIdx[] for implicit 0.0 */
2369   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2370   ba = bav;
2371   bi = b->i;
2372   bj = b->j;
2373   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2374   for (r = 0; r < m; r++) {
2375     ncols = bi[r + 1] - bi[r];
2376     if (ncols == A->cmap->N - n) { /* Brow is dense */
2377       offdiagA[r]   = *ba;
2378       offdiagIdx[r] = cmap[0];
2379     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2380       offdiagA[r] = 0.0;
2381 
2382       /* Find first hole in the cmap */
2383       for (j = 0; j < ncols; j++) {
2384         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2385         if (col > j && j < cstart) {
2386           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2387           break;
2388         } else if (col > j + n && j >= cstart) {
2389           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2390           break;
2391         }
2392       }
2393       if (j == ncols && ncols < A->cmap->N - n) {
2394         /* a hole is outside compressed Bcols */
2395         if (ncols == 0) {
2396           if (cstart) {
2397             offdiagIdx[r] = 0;
2398           } else offdiagIdx[r] = cend;
2399         } else { /* ncols > 0 */
2400           offdiagIdx[r] = cmap[ncols - 1] + 1;
2401           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2402         }
2403       }
2404     }
2405 
2406     for (j = 0; j < ncols; j++) {
2407       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2408         offdiagA[r]   = *ba;
2409         offdiagIdx[r] = cmap[*bj];
2410       }
2411       ba++;
2412       bj++;
2413     }
2414   }
2415 
2416   PetscCall(VecGetArrayWrite(v, &a));
2417   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2418   for (r = 0; r < m; ++r) {
2419     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2420       a[r] = diagA[r];
2421       if (idx) idx[r] = cstart + diagIdx[r];
2422     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2423       a[r] = diagA[r];
2424       if (idx) {
2425         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2426           idx[r] = cstart + diagIdx[r];
2427         } else idx[r] = offdiagIdx[r];
2428       }
2429     } else {
2430       a[r] = offdiagA[r];
2431       if (idx) idx[r] = offdiagIdx[r];
2432     }
2433   }
2434   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2435   PetscCall(VecRestoreArrayWrite(v, &a));
2436   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2437   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2438   PetscCall(VecDestroy(&diagV));
2439   PetscCall(VecDestroy(&offdiagV));
2440   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2441   PetscFunctionReturn(PETSC_SUCCESS);
2442 }
2443 
2444 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2445 {
2446   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2447   PetscInt           m = A->rmap->n, n = A->cmap->n;
2448   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2449   PetscInt          *cmap = mat->garray;
2450   PetscInt          *diagIdx, *offdiagIdx;
2451   Vec                diagV, offdiagV;
2452   PetscScalar       *a, *diagA, *offdiagA;
2453   const PetscScalar *ba, *bav;
2454   PetscInt           r, j, col, ncols, *bi, *bj;
2455   Mat                B = mat->B;
2456   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2457 
2458   PetscFunctionBegin;
2459   /* When a process holds entire A and other processes have no entry */
2460   if (A->cmap->N == n) {
2461     PetscCall(VecGetArrayWrite(v, &diagA));
2462     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2463     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2464     PetscCall(VecDestroy(&diagV));
2465     PetscCall(VecRestoreArrayWrite(v, &diagA));
2466     PetscFunctionReturn(PETSC_SUCCESS);
2467   } else if (n == 0) {
2468     if (m) {
2469       PetscCall(VecGetArrayWrite(v, &a));
2470       for (r = 0; r < m; r++) {
2471         a[r] = PETSC_MIN_REAL;
2472         if (idx) idx[r] = -1;
2473       }
2474       PetscCall(VecRestoreArrayWrite(v, &a));
2475     }
2476     PetscFunctionReturn(PETSC_SUCCESS);
2477   }
2478 
2479   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2480   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2481   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2482   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2483 
2484   /* Get offdiagIdx[] for implicit 0.0 */
2485   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2486   ba = bav;
2487   bi = b->i;
2488   bj = b->j;
2489   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2490   for (r = 0; r < m; r++) {
2491     ncols = bi[r + 1] - bi[r];
2492     if (ncols == A->cmap->N - n) { /* Brow is dense */
2493       offdiagA[r]   = *ba;
2494       offdiagIdx[r] = cmap[0];
2495     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2496       offdiagA[r] = 0.0;
2497 
2498       /* Find first hole in the cmap */
2499       for (j = 0; j < ncols; j++) {
2500         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2501         if (col > j && j < cstart) {
2502           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2503           break;
2504         } else if (col > j + n && j >= cstart) {
2505           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2506           break;
2507         }
2508       }
2509       if (j == ncols && ncols < A->cmap->N - n) {
2510         /* a hole is outside compressed Bcols */
2511         if (ncols == 0) {
2512           if (cstart) {
2513             offdiagIdx[r] = 0;
2514           } else offdiagIdx[r] = cend;
2515         } else { /* ncols > 0 */
2516           offdiagIdx[r] = cmap[ncols - 1] + 1;
2517           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2518         }
2519       }
2520     }
2521 
2522     for (j = 0; j < ncols; j++) {
2523       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2524         offdiagA[r]   = *ba;
2525         offdiagIdx[r] = cmap[*bj];
2526       }
2527       ba++;
2528       bj++;
2529     }
2530   }
2531 
2532   PetscCall(VecGetArrayWrite(v, &a));
2533   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2534   for (r = 0; r < m; ++r) {
2535     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2536       a[r] = diagA[r];
2537       if (idx) idx[r] = cstart + diagIdx[r];
2538     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2539       a[r] = diagA[r];
2540       if (idx) {
2541         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2542           idx[r] = cstart + diagIdx[r];
2543         } else idx[r] = offdiagIdx[r];
2544       }
2545     } else {
2546       a[r] = offdiagA[r];
2547       if (idx) idx[r] = offdiagIdx[r];
2548     }
2549   }
2550   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2551   PetscCall(VecRestoreArrayWrite(v, &a));
2552   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2553   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2554   PetscCall(VecDestroy(&diagV));
2555   PetscCall(VecDestroy(&offdiagV));
2556   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2557   PetscFunctionReturn(PETSC_SUCCESS);
2558 }
2559 
2560 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2561 {
2562   Mat *dummy;
2563 
2564   PetscFunctionBegin;
2565   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2566   *newmat = *dummy;
2567   PetscCall(PetscFree(dummy));
2568   PetscFunctionReturn(PETSC_SUCCESS);
2569 }
2570 
2571 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2572 {
2573   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2574 
2575   PetscFunctionBegin;
2576   PetscCall(MatInvertBlockDiagonal(a->A, values));
2577   A->factorerrortype = a->A->factorerrortype;
2578   PetscFunctionReturn(PETSC_SUCCESS);
2579 }
2580 
2581 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2582 {
2583   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2584 
2585   PetscFunctionBegin;
2586   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2587   PetscCall(MatSetRandom(aij->A, rctx));
2588   if (x->assembled) {
2589     PetscCall(MatSetRandom(aij->B, rctx));
2590   } else {
2591     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2592   }
2593   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2594   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2595   PetscFunctionReturn(PETSC_SUCCESS);
2596 }
2597 
2598 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2599 {
2600   PetscFunctionBegin;
2601   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2602   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2603   PetscFunctionReturn(PETSC_SUCCESS);
2604 }
2605 
2606 /*@
2607   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2608 
2609   Not Collective
2610 
2611   Input Parameter:
2612 . A - the matrix
2613 
2614   Output Parameter:
2615 . nz - the number of nonzeros
2616 
2617   Level: advanced
2618 
2619 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2620 @*/
2621 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2622 {
2623   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2624   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2625   PetscBool   isaij;
2626 
2627   PetscFunctionBegin;
2628   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2629   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2630   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2631   PetscFunctionReturn(PETSC_SUCCESS);
2632 }
2633 
2634 /*@
2635   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2636 
2637   Collective
2638 
2639   Input Parameters:
2640 + A  - the matrix
2641 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2642 
2643   Level: advanced
2644 
2645 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2646 @*/
2647 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2648 {
2649   PetscFunctionBegin;
2650   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2651   PetscFunctionReturn(PETSC_SUCCESS);
2652 }
2653 
2654 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2655 {
2656   PetscBool sc = PETSC_FALSE, flg;
2657 
2658   PetscFunctionBegin;
2659   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2660   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2661   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2662   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2663   PetscOptionsHeadEnd();
2664   PetscFunctionReturn(PETSC_SUCCESS);
2665 }
2666 
2667 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2668 {
2669   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2670   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2671 
2672   PetscFunctionBegin;
2673   if (!Y->preallocated) {
2674     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2675   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2676     PetscInt nonew = aij->nonew;
2677     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2678     aij->nonew = nonew;
2679   }
2680   PetscCall(MatShift_Basic(Y, a));
2681   PetscFunctionReturn(PETSC_SUCCESS);
2682 }
2683 
2684 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2685 {
2686   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2687 
2688   PetscFunctionBegin;
2689   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2690   PetscCall(MatMissingDiagonal(a->A, missing, d));
2691   if (d) {
2692     PetscInt rstart;
2693     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2694     *d += rstart;
2695   }
2696   PetscFunctionReturn(PETSC_SUCCESS);
2697 }
2698 
2699 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2700 {
2701   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2702 
2703   PetscFunctionBegin;
2704   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2705   PetscFunctionReturn(PETSC_SUCCESS);
2706 }
2707 
2708 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2709 {
2710   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2711 
2712   PetscFunctionBegin;
2713   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2714   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2715   PetscFunctionReturn(PETSC_SUCCESS);
2716 }
2717 
2718 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2719                                        MatGetRow_MPIAIJ,
2720                                        MatRestoreRow_MPIAIJ,
2721                                        MatMult_MPIAIJ,
2722                                        /* 4*/ MatMultAdd_MPIAIJ,
2723                                        MatMultTranspose_MPIAIJ,
2724                                        MatMultTransposeAdd_MPIAIJ,
2725                                        NULL,
2726                                        NULL,
2727                                        NULL,
2728                                        /*10*/ NULL,
2729                                        NULL,
2730                                        NULL,
2731                                        MatSOR_MPIAIJ,
2732                                        MatTranspose_MPIAIJ,
2733                                        /*15*/ MatGetInfo_MPIAIJ,
2734                                        MatEqual_MPIAIJ,
2735                                        MatGetDiagonal_MPIAIJ,
2736                                        MatDiagonalScale_MPIAIJ,
2737                                        MatNorm_MPIAIJ,
2738                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2739                                        MatAssemblyEnd_MPIAIJ,
2740                                        MatSetOption_MPIAIJ,
2741                                        MatZeroEntries_MPIAIJ,
2742                                        /*24*/ MatZeroRows_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        /*29*/ MatSetUp_MPI_Hash,
2748                                        NULL,
2749                                        NULL,
2750                                        MatGetDiagonalBlock_MPIAIJ,
2751                                        NULL,
2752                                        /*34*/ MatDuplicate_MPIAIJ,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                        NULL,
2757                                        /*39*/ MatAXPY_MPIAIJ,
2758                                        MatCreateSubMatrices_MPIAIJ,
2759                                        MatIncreaseOverlap_MPIAIJ,
2760                                        MatGetValues_MPIAIJ,
2761                                        MatCopy_MPIAIJ,
2762                                        /*44*/ MatGetRowMax_MPIAIJ,
2763                                        MatScale_MPIAIJ,
2764                                        MatShift_MPIAIJ,
2765                                        MatDiagonalSet_MPIAIJ,
2766                                        MatZeroRowsColumns_MPIAIJ,
2767                                        /*49*/ MatSetRandom_MPIAIJ,
2768                                        MatGetRowIJ_MPIAIJ,
2769                                        MatRestoreRowIJ_MPIAIJ,
2770                                        NULL,
2771                                        NULL,
2772                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2773                                        NULL,
2774                                        MatSetUnfactored_MPIAIJ,
2775                                        MatPermute_MPIAIJ,
2776                                        NULL,
2777                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2778                                        MatDestroy_MPIAIJ,
2779                                        MatView_MPIAIJ,
2780                                        NULL,
2781                                        NULL,
2782                                        /*64*/ NULL,
2783                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2784                                        NULL,
2785                                        NULL,
2786                                        NULL,
2787                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2788                                        MatGetRowMinAbs_MPIAIJ,
2789                                        NULL,
2790                                        NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*75*/ MatFDColoringApply_AIJ,
2794                                        MatSetFromOptions_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        MatFindZeroDiagonals_MPIAIJ,
2798                                        /*80*/ NULL,
2799                                        NULL,
2800                                        NULL,
2801                                        /*83*/ MatLoad_MPIAIJ,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        /*89*/ NULL,
2808                                        NULL,
2809                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2813                                        NULL,
2814                                        NULL,
2815                                        NULL,
2816                                        MatBindToCPU_MPIAIJ,
2817                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2818                                        NULL,
2819                                        NULL,
2820                                        MatConjugate_MPIAIJ,
2821                                        NULL,
2822                                        /*104*/ MatSetValuesRow_MPIAIJ,
2823                                        MatRealPart_MPIAIJ,
2824                                        MatImaginaryPart_MPIAIJ,
2825                                        NULL,
2826                                        NULL,
2827                                        /*109*/ NULL,
2828                                        NULL,
2829                                        MatGetRowMin_MPIAIJ,
2830                                        NULL,
2831                                        MatMissingDiagonal_MPIAIJ,
2832                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2833                                        NULL,
2834                                        MatGetGhosts_MPIAIJ,
2835                                        NULL,
2836                                        NULL,
2837                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2838                                        NULL,
2839                                        NULL,
2840                                        NULL,
2841                                        MatGetMultiProcBlock_MPIAIJ,
2842                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2843                                        MatGetColumnReductions_MPIAIJ,
2844                                        MatInvertBlockDiagonal_MPIAIJ,
2845                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2846                                        MatCreateSubMatricesMPI_MPIAIJ,
2847                                        /*129*/ NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2851                                        NULL,
2852                                        /*134*/ NULL,
2853                                        NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        NULL,
2857                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2858                                        NULL,
2859                                        NULL,
2860                                        MatFDColoringSetUp_MPIXAIJ,
2861                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2862                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2863                                        /*145*/ NULL,
2864                                        NULL,
2865                                        NULL,
2866                                        MatCreateGraph_Simple_AIJ,
2867                                        NULL,
2868                                        /*150*/ NULL,
2869                                        MatEliminateZeros_MPIAIJ,
2870                                        MatGetRowSumAbs_MPIAIJ,
2871                                        NULL,
2872                                        NULL,
2873                                        NULL};
2874 
2875 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2876 {
2877   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2878 
2879   PetscFunctionBegin;
2880   PetscCall(MatStoreValues(aij->A));
2881   PetscCall(MatStoreValues(aij->B));
2882   PetscFunctionReturn(PETSC_SUCCESS);
2883 }
2884 
2885 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2886 {
2887   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2888 
2889   PetscFunctionBegin;
2890   PetscCall(MatRetrieveValues(aij->A));
2891   PetscCall(MatRetrieveValues(aij->B));
2892   PetscFunctionReturn(PETSC_SUCCESS);
2893 }
2894 
2895 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2896 {
2897   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2898   PetscMPIInt size;
2899 
2900   PetscFunctionBegin;
2901   if (B->hash_active) {
2902     B->ops[0]      = b->cops;
2903     B->hash_active = PETSC_FALSE;
2904   }
2905   PetscCall(PetscLayoutSetUp(B->rmap));
2906   PetscCall(PetscLayoutSetUp(B->cmap));
2907 
2908 #if defined(PETSC_USE_CTABLE)
2909   PetscCall(PetscHMapIDestroy(&b->colmap));
2910 #else
2911   PetscCall(PetscFree(b->colmap));
2912 #endif
2913   PetscCall(PetscFree(b->garray));
2914   PetscCall(VecDestroy(&b->lvec));
2915   PetscCall(VecScatterDestroy(&b->Mvctx));
2916 
2917   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2918 
2919   MatSeqXAIJGetOptions_Private(b->B);
2920   PetscCall(MatDestroy(&b->B));
2921   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2922   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2923   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2924   PetscCall(MatSetType(b->B, MATSEQAIJ));
2925   MatSeqXAIJRestoreOptions_Private(b->B);
2926 
2927   MatSeqXAIJGetOptions_Private(b->A);
2928   PetscCall(MatDestroy(&b->A));
2929   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2930   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2931   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2932   PetscCall(MatSetType(b->A, MATSEQAIJ));
2933   MatSeqXAIJRestoreOptions_Private(b->A);
2934 
2935   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2936   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2937   B->preallocated  = PETSC_TRUE;
2938   B->was_assembled = PETSC_FALSE;
2939   B->assembled     = PETSC_FALSE;
2940   PetscFunctionReturn(PETSC_SUCCESS);
2941 }
2942 
2943 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2944 {
2945   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2946   PetscBool   ondiagreset, offdiagreset, memoryreset;
2947 
2948   PetscFunctionBegin;
2949   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2950   PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()");
2951   if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS);
2952 
2953   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset));
2954   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset));
2955   memoryreset = (PetscBool)(ondiagreset || offdiagreset);
2956   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B)));
2957   if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS);
2958 
2959   PetscCall(PetscLayoutSetUp(B->rmap));
2960   PetscCall(PetscLayoutSetUp(B->cmap));
2961   PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled");
2962   PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2963   PetscCall(VecScatterDestroy(&b->Mvctx));
2964 
2965   B->preallocated  = PETSC_TRUE;
2966   B->was_assembled = PETSC_FALSE;
2967   B->assembled     = PETSC_FALSE;
2968   /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */
2969   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2970   PetscFunctionReturn(PETSC_SUCCESS);
2971 }
2972 
2973 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2974 {
2975   Mat         mat;
2976   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2977 
2978   PetscFunctionBegin;
2979   *newmat = NULL;
2980   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2981   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2982   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2983   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2984   a = (Mat_MPIAIJ *)mat->data;
2985 
2986   mat->factortype = matin->factortype;
2987   mat->assembled  = matin->assembled;
2988   mat->insertmode = NOT_SET_VALUES;
2989 
2990   a->size         = oldmat->size;
2991   a->rank         = oldmat->rank;
2992   a->donotstash   = oldmat->donotstash;
2993   a->roworiented  = oldmat->roworiented;
2994   a->rowindices   = NULL;
2995   a->rowvalues    = NULL;
2996   a->getrowactive = PETSC_FALSE;
2997 
2998   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2999   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3000   if (matin->hash_active) {
3001     PetscCall(MatSetUp(mat));
3002   } else {
3003     mat->preallocated = matin->preallocated;
3004     if (oldmat->colmap) {
3005 #if defined(PETSC_USE_CTABLE)
3006       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3007 #else
3008       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3009       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3010 #endif
3011     } else a->colmap = NULL;
3012     if (oldmat->garray) {
3013       PetscInt len;
3014       len = oldmat->B->cmap->n;
3015       PetscCall(PetscMalloc1(len + 1, &a->garray));
3016       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3017     } else a->garray = NULL;
3018 
3019     /* It may happen MatDuplicate is called with a non-assembled matrix
3020       In fact, MatDuplicate only requires the matrix to be preallocated
3021       This may happen inside a DMCreateMatrix_Shell */
3022     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3023     if (oldmat->Mvctx) {
3024       a->Mvctx = oldmat->Mvctx;
3025       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3026     }
3027     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3028     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3029   }
3030   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3031   *newmat = mat;
3032   PetscFunctionReturn(PETSC_SUCCESS);
3033 }
3034 
3035 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3036 {
3037   PetscBool isbinary, ishdf5;
3038 
3039   PetscFunctionBegin;
3040   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3041   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3042   /* force binary viewer to load .info file if it has not yet done so */
3043   PetscCall(PetscViewerSetUp(viewer));
3044   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3045   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3046   if (isbinary) {
3047     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3048   } else if (ishdf5) {
3049 #if defined(PETSC_HAVE_HDF5)
3050     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3051 #else
3052     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3053 #endif
3054   } else {
3055     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3056   }
3057   PetscFunctionReturn(PETSC_SUCCESS);
3058 }
3059 
3060 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3061 {
3062   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3063   PetscInt    *rowidxs, *colidxs;
3064   PetscScalar *matvals;
3065 
3066   PetscFunctionBegin;
3067   PetscCall(PetscViewerSetUp(viewer));
3068 
3069   /* read in matrix header */
3070   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3071   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3072   M  = header[1];
3073   N  = header[2];
3074   nz = header[3];
3075   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3076   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3077   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3078 
3079   /* set block sizes from the viewer's .info file */
3080   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3081   /* set global sizes if not set already */
3082   if (mat->rmap->N < 0) mat->rmap->N = M;
3083   if (mat->cmap->N < 0) mat->cmap->N = N;
3084   PetscCall(PetscLayoutSetUp(mat->rmap));
3085   PetscCall(PetscLayoutSetUp(mat->cmap));
3086 
3087   /* check if the matrix sizes are correct */
3088   PetscCall(MatGetSize(mat, &rows, &cols));
3089   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3090 
3091   /* read in row lengths and build row indices */
3092   PetscCall(MatGetLocalSize(mat, &m, NULL));
3093   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3094   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3095   rowidxs[0] = 0;
3096   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3097   if (nz != PETSC_INT_MAX) {
3098     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3099     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3100   }
3101 
3102   /* read in column indices and matrix values */
3103   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3104   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3105   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3106   /* store matrix indices and values */
3107   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3108   PetscCall(PetscFree(rowidxs));
3109   PetscCall(PetscFree2(colidxs, matvals));
3110   PetscFunctionReturn(PETSC_SUCCESS);
3111 }
3112 
3113 /* Not scalable because of ISAllGather() unless getting all columns. */
3114 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3115 {
3116   IS          iscol_local;
3117   PetscBool   isstride;
3118   PetscMPIInt gisstride = 0;
3119 
3120   PetscFunctionBegin;
3121   /* check if we are grabbing all columns*/
3122   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3123 
3124   if (isstride) {
3125     PetscInt start, len, mstart, mlen;
3126     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3127     PetscCall(ISGetLocalSize(iscol, &len));
3128     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3129     if (mstart == start && mlen - mstart == len) gisstride = 1;
3130   }
3131 
3132   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3133   if (gisstride) {
3134     PetscInt N;
3135     PetscCall(MatGetSize(mat, NULL, &N));
3136     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3137     PetscCall(ISSetIdentity(iscol_local));
3138     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3139   } else {
3140     PetscInt cbs;
3141     PetscCall(ISGetBlockSize(iscol, &cbs));
3142     PetscCall(ISAllGather(iscol, &iscol_local));
3143     PetscCall(ISSetBlockSize(iscol_local, cbs));
3144   }
3145 
3146   *isseq = iscol_local;
3147   PetscFunctionReturn(PETSC_SUCCESS);
3148 }
3149 
3150 /*
3151  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3152  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3153 
3154  Input Parameters:
3155 +   mat - matrix
3156 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3157            i.e., mat->rstart <= isrow[i] < mat->rend
3158 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3159            i.e., mat->cstart <= iscol[i] < mat->cend
3160 
3161  Output Parameters:
3162 +   isrow_d - sequential row index set for retrieving mat->A
3163 .   iscol_d - sequential  column index set for retrieving mat->A
3164 .   iscol_o - sequential column index set for retrieving mat->B
3165 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3166  */
3167 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3168 {
3169   Vec             x, cmap;
3170   const PetscInt *is_idx;
3171   PetscScalar    *xarray, *cmaparray;
3172   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3173   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3174   Mat             B    = a->B;
3175   Vec             lvec = a->lvec, lcmap;
3176   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3177   MPI_Comm        comm;
3178   VecScatter      Mvctx = a->Mvctx;
3179 
3180   PetscFunctionBegin;
3181   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3182   PetscCall(ISGetLocalSize(iscol, &ncols));
3183 
3184   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3185   PetscCall(MatCreateVecs(mat, &x, NULL));
3186   PetscCall(VecSet(x, -1.0));
3187   PetscCall(VecDuplicate(x, &cmap));
3188   PetscCall(VecSet(cmap, -1.0));
3189 
3190   /* Get start indices */
3191   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3192   isstart -= ncols;
3193   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3194 
3195   PetscCall(ISGetIndices(iscol, &is_idx));
3196   PetscCall(VecGetArray(x, &xarray));
3197   PetscCall(VecGetArray(cmap, &cmaparray));
3198   PetscCall(PetscMalloc1(ncols, &idx));
3199   for (i = 0; i < ncols; i++) {
3200     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3201     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3202     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3203   }
3204   PetscCall(VecRestoreArray(x, &xarray));
3205   PetscCall(VecRestoreArray(cmap, &cmaparray));
3206   PetscCall(ISRestoreIndices(iscol, &is_idx));
3207 
3208   /* Get iscol_d */
3209   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3210   PetscCall(ISGetBlockSize(iscol, &i));
3211   PetscCall(ISSetBlockSize(*iscol_d, i));
3212 
3213   /* Get isrow_d */
3214   PetscCall(ISGetLocalSize(isrow, &m));
3215   rstart = mat->rmap->rstart;
3216   PetscCall(PetscMalloc1(m, &idx));
3217   PetscCall(ISGetIndices(isrow, &is_idx));
3218   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3219   PetscCall(ISRestoreIndices(isrow, &is_idx));
3220 
3221   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3222   PetscCall(ISGetBlockSize(isrow, &i));
3223   PetscCall(ISSetBlockSize(*isrow_d, i));
3224 
3225   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3226   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3227   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3228 
3229   PetscCall(VecDuplicate(lvec, &lcmap));
3230 
3231   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3232   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3233 
3234   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3235   /* off-process column indices */
3236   count = 0;
3237   PetscCall(PetscMalloc1(Bn, &idx));
3238   PetscCall(PetscMalloc1(Bn, &cmap1));
3239 
3240   PetscCall(VecGetArray(lvec, &xarray));
3241   PetscCall(VecGetArray(lcmap, &cmaparray));
3242   for (i = 0; i < Bn; i++) {
3243     if (PetscRealPart(xarray[i]) > -1.0) {
3244       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3245       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3246       count++;
3247     }
3248   }
3249   PetscCall(VecRestoreArray(lvec, &xarray));
3250   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3251 
3252   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3253   /* cannot ensure iscol_o has same blocksize as iscol! */
3254 
3255   PetscCall(PetscFree(idx));
3256   *garray = cmap1;
3257 
3258   PetscCall(VecDestroy(&x));
3259   PetscCall(VecDestroy(&cmap));
3260   PetscCall(VecDestroy(&lcmap));
3261   PetscFunctionReturn(PETSC_SUCCESS);
3262 }
3263 
3264 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3265 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3266 {
3267   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3268   Mat         M = NULL;
3269   MPI_Comm    comm;
3270   IS          iscol_d, isrow_d, iscol_o;
3271   Mat         Asub = NULL, Bsub = NULL;
3272   PetscInt    n;
3273 
3274   PetscFunctionBegin;
3275   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3276 
3277   if (call == MAT_REUSE_MATRIX) {
3278     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3279     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3280     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3281 
3282     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3283     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3284 
3285     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3286     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3287 
3288     /* Update diagonal and off-diagonal portions of submat */
3289     asub = (Mat_MPIAIJ *)(*submat)->data;
3290     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3291     PetscCall(ISGetLocalSize(iscol_o, &n));
3292     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3293     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3294     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3295 
3296   } else { /* call == MAT_INITIAL_MATRIX) */
3297     const PetscInt *garray;
3298     PetscInt        BsubN;
3299 
3300     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3301     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3302 
3303     /* Create local submatrices Asub and Bsub */
3304     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3305     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3306 
3307     /* Create submatrix M */
3308     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3309 
3310     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3311     asub = (Mat_MPIAIJ *)M->data;
3312 
3313     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3314     n = asub->B->cmap->N;
3315     if (BsubN > n) {
3316       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3317       const PetscInt *idx;
3318       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3319       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3320 
3321       PetscCall(PetscMalloc1(n, &idx_new));
3322       j = 0;
3323       PetscCall(ISGetIndices(iscol_o, &idx));
3324       for (i = 0; i < n; i++) {
3325         if (j >= BsubN) break;
3326         while (subgarray[i] > garray[j]) j++;
3327 
3328         if (subgarray[i] == garray[j]) {
3329           idx_new[i] = idx[j++];
3330         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3331       }
3332       PetscCall(ISRestoreIndices(iscol_o, &idx));
3333 
3334       PetscCall(ISDestroy(&iscol_o));
3335       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3336 
3337     } else if (BsubN < n) {
3338       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3339     }
3340 
3341     PetscCall(PetscFree(garray));
3342     *submat = M;
3343 
3344     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3345     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3346     PetscCall(ISDestroy(&isrow_d));
3347 
3348     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3349     PetscCall(ISDestroy(&iscol_d));
3350 
3351     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3352     PetscCall(ISDestroy(&iscol_o));
3353   }
3354   PetscFunctionReturn(PETSC_SUCCESS);
3355 }
3356 
3357 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3358 {
3359   IS        iscol_local = NULL, isrow_d;
3360   PetscInt  csize;
3361   PetscInt  n, i, j, start, end;
3362   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3363   MPI_Comm  comm;
3364 
3365   PetscFunctionBegin;
3366   /* If isrow has same processor distribution as mat,
3367      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3368   if (call == MAT_REUSE_MATRIX) {
3369     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3370     if (isrow_d) {
3371       sameRowDist  = PETSC_TRUE;
3372       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3373     } else {
3374       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3375       if (iscol_local) {
3376         sameRowDist  = PETSC_TRUE;
3377         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3378       }
3379     }
3380   } else {
3381     /* Check if isrow has same processor distribution as mat */
3382     sameDist[0] = PETSC_FALSE;
3383     PetscCall(ISGetLocalSize(isrow, &n));
3384     if (!n) {
3385       sameDist[0] = PETSC_TRUE;
3386     } else {
3387       PetscCall(ISGetMinMax(isrow, &i, &j));
3388       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3389       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3390     }
3391 
3392     /* Check if iscol has same processor distribution as mat */
3393     sameDist[1] = PETSC_FALSE;
3394     PetscCall(ISGetLocalSize(iscol, &n));
3395     if (!n) {
3396       sameDist[1] = PETSC_TRUE;
3397     } else {
3398       PetscCall(ISGetMinMax(iscol, &i, &j));
3399       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3400       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3401     }
3402 
3403     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3404     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3405     sameRowDist = tsameDist[0];
3406   }
3407 
3408   if (sameRowDist) {
3409     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3410       /* isrow and iscol have same processor distribution as mat */
3411       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3412       PetscFunctionReturn(PETSC_SUCCESS);
3413     } else { /* sameRowDist */
3414       /* isrow has same processor distribution as mat */
3415       if (call == MAT_INITIAL_MATRIX) {
3416         PetscBool sorted;
3417         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3418         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3419         PetscCall(ISGetSize(iscol, &i));
3420         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3421 
3422         PetscCall(ISSorted(iscol_local, &sorted));
3423         if (sorted) {
3424           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3425           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3426           PetscFunctionReturn(PETSC_SUCCESS);
3427         }
3428       } else { /* call == MAT_REUSE_MATRIX */
3429         IS iscol_sub;
3430         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3431         if (iscol_sub) {
3432           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3433           PetscFunctionReturn(PETSC_SUCCESS);
3434         }
3435       }
3436     }
3437   }
3438 
3439   /* General case: iscol -> iscol_local which has global size of iscol */
3440   if (call == MAT_REUSE_MATRIX) {
3441     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3442     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3443   } else {
3444     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3445   }
3446 
3447   PetscCall(ISGetLocalSize(iscol, &csize));
3448   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3449 
3450   if (call == MAT_INITIAL_MATRIX) {
3451     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3452     PetscCall(ISDestroy(&iscol_local));
3453   }
3454   PetscFunctionReturn(PETSC_SUCCESS);
3455 }
3456 
3457 /*@C
3458   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3459   and "off-diagonal" part of the matrix in CSR format.
3460 
3461   Collective
3462 
3463   Input Parameters:
3464 + comm   - MPI communicator
3465 . A      - "diagonal" portion of matrix
3466 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3467 - garray - global index of `B` columns
3468 
3469   Output Parameter:
3470 . mat - the matrix, with input `A` as its local diagonal matrix
3471 
3472   Level: advanced
3473 
3474   Notes:
3475   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3476 
3477   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3478 
3479 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3480 @*/
3481 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3482 {
3483   Mat_MPIAIJ        *maij;
3484   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3485   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3486   const PetscScalar *oa;
3487   Mat                Bnew;
3488   PetscInt           m, n, N;
3489   MatType            mpi_mat_type;
3490 
3491   PetscFunctionBegin;
3492   PetscCall(MatCreate(comm, mat));
3493   PetscCall(MatGetSize(A, &m, &n));
3494   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3495   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3496   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3497   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3498 
3499   /* Get global columns of mat */
3500   PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3501 
3502   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3503   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3504   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3505   PetscCall(MatSetType(*mat, mpi_mat_type));
3506 
3507   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3508   maij = (Mat_MPIAIJ *)(*mat)->data;
3509 
3510   (*mat)->preallocated = PETSC_TRUE;
3511 
3512   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3513   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3514 
3515   /* Set A as diagonal portion of *mat */
3516   maij->A = A;
3517 
3518   nz = oi[m];
3519   for (i = 0; i < nz; i++) {
3520     col   = oj[i];
3521     oj[i] = garray[col];
3522   }
3523 
3524   /* Set Bnew as off-diagonal portion of *mat */
3525   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3526   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3527   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3528   bnew        = (Mat_SeqAIJ *)Bnew->data;
3529   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3530   maij->B     = Bnew;
3531 
3532   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3533 
3534   b->free_a  = PETSC_FALSE;
3535   b->free_ij = PETSC_FALSE;
3536   PetscCall(MatDestroy(&B));
3537 
3538   bnew->free_a  = PETSC_TRUE;
3539   bnew->free_ij = PETSC_TRUE;
3540 
3541   /* condense columns of maij->B */
3542   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3543   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3544   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3545   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3546   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3547   PetscFunctionReturn(PETSC_SUCCESS);
3548 }
3549 
3550 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3551 
3552 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3553 {
3554   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3555   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3556   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3557   Mat             M, Msub, B = a->B;
3558   MatScalar      *aa;
3559   Mat_SeqAIJ     *aij;
3560   PetscInt       *garray = a->garray, *colsub, Ncols;
3561   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3562   IS              iscol_sub, iscmap;
3563   const PetscInt *is_idx, *cmap;
3564   PetscBool       allcolumns = PETSC_FALSE;
3565   MPI_Comm        comm;
3566 
3567   PetscFunctionBegin;
3568   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3569   if (call == MAT_REUSE_MATRIX) {
3570     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3571     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3572     PetscCall(ISGetLocalSize(iscol_sub, &count));
3573 
3574     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3575     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3576 
3577     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3578     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3579 
3580     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3581 
3582   } else { /* call == MAT_INITIAL_MATRIX) */
3583     PetscBool flg;
3584 
3585     PetscCall(ISGetLocalSize(iscol, &n));
3586     PetscCall(ISGetSize(iscol, &Ncols));
3587 
3588     /* (1) iscol -> nonscalable iscol_local */
3589     /* Check for special case: each processor gets entire matrix columns */
3590     PetscCall(ISIdentity(iscol_local, &flg));
3591     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3592     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3593     if (allcolumns) {
3594       iscol_sub = iscol_local;
3595       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3596       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3597 
3598     } else {
3599       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3600       PetscInt *idx, *cmap1, k;
3601       PetscCall(PetscMalloc1(Ncols, &idx));
3602       PetscCall(PetscMalloc1(Ncols, &cmap1));
3603       PetscCall(ISGetIndices(iscol_local, &is_idx));
3604       count = 0;
3605       k     = 0;
3606       for (i = 0; i < Ncols; i++) {
3607         j = is_idx[i];
3608         if (j >= cstart && j < cend) {
3609           /* diagonal part of mat */
3610           idx[count]     = j;
3611           cmap1[count++] = i; /* column index in submat */
3612         } else if (Bn) {
3613           /* off-diagonal part of mat */
3614           if (j == garray[k]) {
3615             idx[count]     = j;
3616             cmap1[count++] = i; /* column index in submat */
3617           } else if (j > garray[k]) {
3618             while (j > garray[k] && k < Bn - 1) k++;
3619             if (j == garray[k]) {
3620               idx[count]     = j;
3621               cmap1[count++] = i; /* column index in submat */
3622             }
3623           }
3624         }
3625       }
3626       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3627 
3628       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3629       PetscCall(ISGetBlockSize(iscol, &cbs));
3630       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3631 
3632       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3633     }
3634 
3635     /* (3) Create sequential Msub */
3636     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3637   }
3638 
3639   PetscCall(ISGetLocalSize(iscol_sub, &count));
3640   aij = (Mat_SeqAIJ *)Msub->data;
3641   ii  = aij->i;
3642   PetscCall(ISGetIndices(iscmap, &cmap));
3643 
3644   /*
3645       m - number of local rows
3646       Ncols - number of columns (same on all processors)
3647       rstart - first row in new global matrix generated
3648   */
3649   PetscCall(MatGetSize(Msub, &m, NULL));
3650 
3651   if (call == MAT_INITIAL_MATRIX) {
3652     /* (4) Create parallel newmat */
3653     PetscMPIInt rank, size;
3654     PetscInt    csize;
3655 
3656     PetscCallMPI(MPI_Comm_size(comm, &size));
3657     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3658 
3659     /*
3660         Determine the number of non-zeros in the diagonal and off-diagonal
3661         portions of the matrix in order to do correct preallocation
3662     */
3663 
3664     /* first get start and end of "diagonal" columns */
3665     PetscCall(ISGetLocalSize(iscol, &csize));
3666     if (csize == PETSC_DECIDE) {
3667       PetscCall(ISGetSize(isrow, &mglobal));
3668       if (mglobal == Ncols) { /* square matrix */
3669         nlocal = m;
3670       } else {
3671         nlocal = Ncols / size + ((Ncols % size) > rank);
3672       }
3673     } else {
3674       nlocal = csize;
3675     }
3676     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3677     rstart = rend - nlocal;
3678     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3679 
3680     /* next, compute all the lengths */
3681     jj = aij->j;
3682     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3683     olens = dlens + m;
3684     for (i = 0; i < m; i++) {
3685       jend = ii[i + 1] - ii[i];
3686       olen = 0;
3687       dlen = 0;
3688       for (j = 0; j < jend; j++) {
3689         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3690         else dlen++;
3691         jj++;
3692       }
3693       olens[i] = olen;
3694       dlens[i] = dlen;
3695     }
3696 
3697     PetscCall(ISGetBlockSize(isrow, &bs));
3698     PetscCall(ISGetBlockSize(iscol, &cbs));
3699 
3700     PetscCall(MatCreate(comm, &M));
3701     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3702     PetscCall(MatSetBlockSizes(M, bs, cbs));
3703     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3704     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3705     PetscCall(PetscFree(dlens));
3706 
3707   } else { /* call == MAT_REUSE_MATRIX */
3708     M = *newmat;
3709     PetscCall(MatGetLocalSize(M, &i, NULL));
3710     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3711     PetscCall(MatZeroEntries(M));
3712     /*
3713          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3714        rather than the slower MatSetValues().
3715     */
3716     M->was_assembled = PETSC_TRUE;
3717     M->assembled     = PETSC_FALSE;
3718   }
3719 
3720   /* (5) Set values of Msub to *newmat */
3721   PetscCall(PetscMalloc1(count, &colsub));
3722   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3723 
3724   jj = aij->j;
3725   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3726   for (i = 0; i < m; i++) {
3727     row = rstart + i;
3728     nz  = ii[i + 1] - ii[i];
3729     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3730     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3731     jj += nz;
3732     aa += nz;
3733   }
3734   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3735   PetscCall(ISRestoreIndices(iscmap, &cmap));
3736 
3737   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3738   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3739 
3740   PetscCall(PetscFree(colsub));
3741 
3742   /* save Msub, iscol_sub and iscmap used in processor for next request */
3743   if (call == MAT_INITIAL_MATRIX) {
3744     *newmat = M;
3745     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3746     PetscCall(MatDestroy(&Msub));
3747 
3748     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3749     PetscCall(ISDestroy(&iscol_sub));
3750 
3751     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3752     PetscCall(ISDestroy(&iscmap));
3753 
3754     if (iscol_local) {
3755       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3756       PetscCall(ISDestroy(&iscol_local));
3757     }
3758   }
3759   PetscFunctionReturn(PETSC_SUCCESS);
3760 }
3761 
3762 /*
3763     Not great since it makes two copies of the submatrix, first an SeqAIJ
3764   in local and then by concatenating the local matrices the end result.
3765   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3766 
3767   This requires a sequential iscol with all indices.
3768 */
3769 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3770 {
3771   PetscMPIInt rank, size;
3772   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3773   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3774   Mat         M, Mreuse;
3775   MatScalar  *aa, *vwork;
3776   MPI_Comm    comm;
3777   Mat_SeqAIJ *aij;
3778   PetscBool   colflag, allcolumns = PETSC_FALSE;
3779 
3780   PetscFunctionBegin;
3781   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3782   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3783   PetscCallMPI(MPI_Comm_size(comm, &size));
3784 
3785   /* Check for special case: each processor gets entire matrix columns */
3786   PetscCall(ISIdentity(iscol, &colflag));
3787   PetscCall(ISGetLocalSize(iscol, &n));
3788   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3789   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3790 
3791   if (call == MAT_REUSE_MATRIX) {
3792     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3793     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3794     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3795   } else {
3796     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3797   }
3798 
3799   /*
3800       m - number of local rows
3801       n - number of columns (same on all processors)
3802       rstart - first row in new global matrix generated
3803   */
3804   PetscCall(MatGetSize(Mreuse, &m, &n));
3805   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3806   if (call == MAT_INITIAL_MATRIX) {
3807     aij = (Mat_SeqAIJ *)Mreuse->data;
3808     ii  = aij->i;
3809     jj  = aij->j;
3810 
3811     /*
3812         Determine the number of non-zeros in the diagonal and off-diagonal
3813         portions of the matrix in order to do correct preallocation
3814     */
3815 
3816     /* first get start and end of "diagonal" columns */
3817     if (csize == PETSC_DECIDE) {
3818       PetscCall(ISGetSize(isrow, &mglobal));
3819       if (mglobal == n) { /* square matrix */
3820         nlocal = m;
3821       } else {
3822         nlocal = n / size + ((n % size) > rank);
3823       }
3824     } else {
3825       nlocal = csize;
3826     }
3827     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3828     rstart = rend - nlocal;
3829     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3830 
3831     /* next, compute all the lengths */
3832     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3833     olens = dlens + m;
3834     for (i = 0; i < m; i++) {
3835       jend = ii[i + 1] - ii[i];
3836       olen = 0;
3837       dlen = 0;
3838       for (j = 0; j < jend; j++) {
3839         if (*jj < rstart || *jj >= rend) olen++;
3840         else dlen++;
3841         jj++;
3842       }
3843       olens[i] = olen;
3844       dlens[i] = dlen;
3845     }
3846     PetscCall(MatCreate(comm, &M));
3847     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3848     PetscCall(MatSetBlockSizes(M, bs, cbs));
3849     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3850     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3851     PetscCall(PetscFree(dlens));
3852   } else {
3853     PetscInt ml, nl;
3854 
3855     M = *newmat;
3856     PetscCall(MatGetLocalSize(M, &ml, &nl));
3857     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3858     PetscCall(MatZeroEntries(M));
3859     /*
3860          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3861        rather than the slower MatSetValues().
3862     */
3863     M->was_assembled = PETSC_TRUE;
3864     M->assembled     = PETSC_FALSE;
3865   }
3866   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3867   aij = (Mat_SeqAIJ *)Mreuse->data;
3868   ii  = aij->i;
3869   jj  = aij->j;
3870 
3871   /* trigger copy to CPU if needed */
3872   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3873   for (i = 0; i < m; i++) {
3874     row   = rstart + i;
3875     nz    = ii[i + 1] - ii[i];
3876     cwork = jj;
3877     jj    = PetscSafePointerPlusOffset(jj, nz);
3878     vwork = aa;
3879     aa    = PetscSafePointerPlusOffset(aa, nz);
3880     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3881   }
3882   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3883 
3884   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3885   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3886   *newmat = M;
3887 
3888   /* save submatrix used in processor for next request */
3889   if (call == MAT_INITIAL_MATRIX) {
3890     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3891     PetscCall(MatDestroy(&Mreuse));
3892   }
3893   PetscFunctionReturn(PETSC_SUCCESS);
3894 }
3895 
3896 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3897 {
3898   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3899   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3900   const PetscInt *JJ;
3901   PetscBool       nooffprocentries;
3902   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3903 
3904   PetscFunctionBegin;
3905   PetscCall(PetscLayoutSetUp(B->rmap));
3906   PetscCall(PetscLayoutSetUp(B->cmap));
3907   m       = B->rmap->n;
3908   cstart  = B->cmap->rstart;
3909   cend    = B->cmap->rend;
3910   rstart  = B->rmap->rstart;
3911   irstart = Ii[0];
3912 
3913   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3914 
3915   if (PetscDefined(USE_DEBUG)) {
3916     for (i = 0; i < m; i++) {
3917       nnz = Ii[i + 1] - Ii[i];
3918       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3919       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3920       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3921       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3922     }
3923   }
3924 
3925   for (i = 0; i < m; i++) {
3926     nnz     = Ii[i + 1] - Ii[i];
3927     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3928     nnz_max = PetscMax(nnz_max, nnz);
3929     d       = 0;
3930     for (j = 0; j < nnz; j++) {
3931       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3932     }
3933     d_nnz[i] = d;
3934     o_nnz[i] = nnz - d;
3935   }
3936   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3937   PetscCall(PetscFree2(d_nnz, o_nnz));
3938 
3939   for (i = 0; i < m; i++) {
3940     ii = i + rstart;
3941     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3942   }
3943   nooffprocentries    = B->nooffprocentries;
3944   B->nooffprocentries = PETSC_TRUE;
3945   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3946   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3947   B->nooffprocentries = nooffprocentries;
3948 
3949   /* count number of entries below block diagonal */
3950   PetscCall(PetscFree(Aij->ld));
3951   PetscCall(PetscCalloc1(m, &ld));
3952   Aij->ld = ld;
3953   for (i = 0; i < m; i++) {
3954     nnz = Ii[i + 1] - Ii[i];
3955     j   = 0;
3956     while (j < nnz && J[j] < cstart) j++;
3957     ld[i] = j;
3958     if (J) J += nnz;
3959   }
3960 
3961   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3962   PetscFunctionReturn(PETSC_SUCCESS);
3963 }
3964 
3965 /*@
3966   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3967   (the default parallel PETSc format).
3968 
3969   Collective
3970 
3971   Input Parameters:
3972 + B - the matrix
3973 . i - the indices into `j` for the start of each local row (indices start with zero)
3974 . j - the column indices for each local row (indices start with zero)
3975 - v - optional values in the matrix
3976 
3977   Level: developer
3978 
3979   Notes:
3980   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3981   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3982   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3983 
3984   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3985 
3986   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3987 
3988   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3989 
3990   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3991   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3992 
3993   The format which is used for the sparse matrix input, is equivalent to a
3994   row-major ordering.. i.e for the following matrix, the input data expected is
3995   as shown
3996 .vb
3997         1 0 0
3998         2 0 3     P0
3999        -------
4000         4 5 6     P1
4001 
4002      Process0 [P0] rows_owned=[0,1]
4003         i =  {0,1,3}  [size = nrow+1  = 2+1]
4004         j =  {0,0,2}  [size = 3]
4005         v =  {1,2,3}  [size = 3]
4006 
4007      Process1 [P1] rows_owned=[2]
4008         i =  {0,3}    [size = nrow+1  = 1+1]
4009         j =  {0,1,2}  [size = 3]
4010         v =  {4,5,6}  [size = 3]
4011 .ve
4012 
4013 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4014           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4015 @*/
4016 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4017 {
4018   PetscFunctionBegin;
4019   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4020   PetscFunctionReturn(PETSC_SUCCESS);
4021 }
4022 
4023 /*@
4024   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4025   (the default parallel PETSc format).  For good matrix assembly performance
4026   the user should preallocate the matrix storage by setting the parameters
4027   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4028 
4029   Collective
4030 
4031   Input Parameters:
4032 + B     - the matrix
4033 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4034            (same value is used for all local rows)
4035 . d_nnz - array containing the number of nonzeros in the various rows of the
4036            DIAGONAL portion of the local submatrix (possibly different for each row)
4037            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4038            The size of this array is equal to the number of local rows, i.e 'm'.
4039            For matrices that will be factored, you must leave room for (and set)
4040            the diagonal entry even if it is zero.
4041 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4042            submatrix (same value is used for all local rows).
4043 - o_nnz - array containing the number of nonzeros in the various rows of the
4044            OFF-DIAGONAL portion of the local submatrix (possibly different for
4045            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4046            structure. The size of this array is equal to the number
4047            of local rows, i.e 'm'.
4048 
4049   Example Usage:
4050   Consider the following 8x8 matrix with 34 non-zero values, that is
4051   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4052   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4053   as follows
4054 
4055 .vb
4056             1  2  0  |  0  3  0  |  0  4
4057     Proc0   0  5  6  |  7  0  0  |  8  0
4058             9  0 10  | 11  0  0  | 12  0
4059     -------------------------------------
4060            13  0 14  | 15 16 17  |  0  0
4061     Proc1   0 18  0  | 19 20 21  |  0  0
4062             0  0  0  | 22 23  0  | 24  0
4063     -------------------------------------
4064     Proc2  25 26 27  |  0  0 28  | 29  0
4065            30  0  0  | 31 32 33  |  0 34
4066 .ve
4067 
4068   This can be represented as a collection of submatrices as
4069 .vb
4070       A B C
4071       D E F
4072       G H I
4073 .ve
4074 
4075   Where the submatrices A,B,C are owned by proc0, D,E,F are
4076   owned by proc1, G,H,I are owned by proc2.
4077 
4078   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4079   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4080   The 'M','N' parameters are 8,8, and have the same values on all procs.
4081 
4082   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4083   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4084   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4085   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4086   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4087   matrix, and [DF] as another `MATSEQAIJ` matrix.
4088 
4089   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4090   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4091   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4092   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4093   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4094   In this case, the values of `d_nz`, `o_nz` are
4095 .vb
4096      proc0  dnz = 2, o_nz = 2
4097      proc1  dnz = 3, o_nz = 2
4098      proc2  dnz = 1, o_nz = 4
4099 .ve
4100   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4101   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4102   for proc3. i.e we are using 12+15+10=37 storage locations to store
4103   34 values.
4104 
4105   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4106   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4107   In the above case the values for `d_nnz`, `o_nnz` are
4108 .vb
4109      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4110      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4111      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4112 .ve
4113   Here the space allocated is sum of all the above values i.e 34, and
4114   hence pre-allocation is perfect.
4115 
4116   Level: intermediate
4117 
4118   Notes:
4119   If the *_nnz parameter is given then the *_nz parameter is ignored
4120 
4121   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4122   storage.  The stored row and column indices begin with zero.
4123   See [Sparse Matrices](sec_matsparse) for details.
4124 
4125   The parallel matrix is partitioned such that the first m0 rows belong to
4126   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4127   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4128 
4129   The DIAGONAL portion of the local submatrix of a processor can be defined
4130   as the submatrix which is obtained by extraction the part corresponding to
4131   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4132   first row that belongs to the processor, r2 is the last row belonging to
4133   the this processor, and c1-c2 is range of indices of the local part of a
4134   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4135   common case of a square matrix, the row and column ranges are the same and
4136   the DIAGONAL part is also square. The remaining portion of the local
4137   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4138 
4139   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4140 
4141   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4142   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4143   You can also run with the option `-info` and look for messages with the string
4144   malloc in them to see if additional memory allocation was needed.
4145 
4146 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4147           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4148 @*/
4149 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4150 {
4151   PetscFunctionBegin;
4152   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4153   PetscValidType(B, 1);
4154   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4155   PetscFunctionReturn(PETSC_SUCCESS);
4156 }
4157 
4158 /*@
4159   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4160   CSR format for the local rows.
4161 
4162   Collective
4163 
4164   Input Parameters:
4165 + comm - MPI communicator
4166 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4167 . n    - This value should be the same as the local size used in creating the
4168          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4169          calculated if `N` is given) For square matrices n is almost always `m`.
4170 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4171 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4172 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4173 . j    - global column indices
4174 - a    - optional matrix values
4175 
4176   Output Parameter:
4177 . mat - the matrix
4178 
4179   Level: intermediate
4180 
4181   Notes:
4182   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4183   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4184   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4185 
4186   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4187 
4188   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4189 
4190   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4191   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4192 
4193   The format which is used for the sparse matrix input, is equivalent to a
4194   row-major ordering, i.e., for the following matrix, the input data expected is
4195   as shown
4196 .vb
4197         1 0 0
4198         2 0 3     P0
4199        -------
4200         4 5 6     P1
4201 
4202      Process0 [P0] rows_owned=[0,1]
4203         i =  {0,1,3}  [size = nrow+1  = 2+1]
4204         j =  {0,0,2}  [size = 3]
4205         v =  {1,2,3}  [size = 3]
4206 
4207      Process1 [P1] rows_owned=[2]
4208         i =  {0,3}    [size = nrow+1  = 1+1]
4209         j =  {0,1,2}  [size = 3]
4210         v =  {4,5,6}  [size = 3]
4211 .ve
4212 
4213 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4214           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4215 @*/
4216 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4217 {
4218   PetscFunctionBegin;
4219   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4220   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4221   PetscCall(MatCreate(comm, mat));
4222   PetscCall(MatSetSizes(*mat, m, n, M, N));
4223   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4224   PetscCall(MatSetType(*mat, MATMPIAIJ));
4225   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4226   PetscFunctionReturn(PETSC_SUCCESS);
4227 }
4228 
4229 /*@
4230   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4231   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4232   from `MatCreateMPIAIJWithArrays()`
4233 
4234   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4235 
4236   Collective
4237 
4238   Input Parameters:
4239 + mat - the matrix
4240 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4241 . n   - This value should be the same as the local size used in creating the
4242        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4243        calculated if N is given) For square matrices n is almost always m.
4244 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4245 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4246 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4247 . J   - column indices
4248 - v   - matrix values
4249 
4250   Level: deprecated
4251 
4252 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4253           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4254 @*/
4255 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4256 {
4257   PetscInt        nnz, i;
4258   PetscBool       nooffprocentries;
4259   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4260   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4261   PetscScalar    *ad, *ao;
4262   PetscInt        ldi, Iii, md;
4263   const PetscInt *Adi = Ad->i;
4264   PetscInt       *ld  = Aij->ld;
4265 
4266   PetscFunctionBegin;
4267   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4268   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4269   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4270   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4271 
4272   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4273   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4274 
4275   for (i = 0; i < m; i++) {
4276     if (PetscDefined(USE_DEBUG)) {
4277       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4278         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4279         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4280       }
4281     }
4282     nnz = Ii[i + 1] - Ii[i];
4283     Iii = Ii[i];
4284     ldi = ld[i];
4285     md  = Adi[i + 1] - Adi[i];
4286     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4287     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4288     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4289     ad += md;
4290     ao += nnz - md;
4291   }
4292   nooffprocentries      = mat->nooffprocentries;
4293   mat->nooffprocentries = PETSC_TRUE;
4294   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4295   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4296   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4297   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4298   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4299   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4300   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4301   mat->nooffprocentries = nooffprocentries;
4302   PetscFunctionReturn(PETSC_SUCCESS);
4303 }
4304 
4305 /*@
4306   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4307 
4308   Collective
4309 
4310   Input Parameters:
4311 + mat - the matrix
4312 - v   - matrix values, stored by row
4313 
4314   Level: intermediate
4315 
4316   Notes:
4317   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4318 
4319   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4320 
4321 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4322           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4323 @*/
4324 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4325 {
4326   PetscInt        nnz, i, m;
4327   PetscBool       nooffprocentries;
4328   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4329   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4330   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4331   PetscScalar    *ad, *ao;
4332   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4333   PetscInt        ldi, Iii, md;
4334   PetscInt       *ld = Aij->ld;
4335 
4336   PetscFunctionBegin;
4337   m = mat->rmap->n;
4338 
4339   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4340   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4341   Iii = 0;
4342   for (i = 0; i < m; i++) {
4343     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4344     ldi = ld[i];
4345     md  = Adi[i + 1] - Adi[i];
4346     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4347     ad += md;
4348     if (ao) {
4349       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4350       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4351       ao += nnz - md;
4352     }
4353     Iii += nnz;
4354   }
4355   nooffprocentries      = mat->nooffprocentries;
4356   mat->nooffprocentries = PETSC_TRUE;
4357   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4358   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4359   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4360   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4361   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4362   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4363   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4364   mat->nooffprocentries = nooffprocentries;
4365   PetscFunctionReturn(PETSC_SUCCESS);
4366 }
4367 
4368 /*@
4369   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4370   (the default parallel PETSc format).  For good matrix assembly performance
4371   the user should preallocate the matrix storage by setting the parameters
4372   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4373 
4374   Collective
4375 
4376   Input Parameters:
4377 + comm  - MPI communicator
4378 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4379           This value should be the same as the local size used in creating the
4380           y vector for the matrix-vector product y = Ax.
4381 . n     - This value should be the same as the local size used in creating the
4382           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4383           calculated if N is given) For square matrices n is almost always m.
4384 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4385 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4386 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4387           (same value is used for all local rows)
4388 . d_nnz - array containing the number of nonzeros in the various rows of the
4389           DIAGONAL portion of the local submatrix (possibly different for each row)
4390           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4391           The size of this array is equal to the number of local rows, i.e 'm'.
4392 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4393           submatrix (same value is used for all local rows).
4394 - o_nnz - array containing the number of nonzeros in the various rows of the
4395           OFF-DIAGONAL portion of the local submatrix (possibly different for
4396           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4397           structure. The size of this array is equal to the number
4398           of local rows, i.e 'm'.
4399 
4400   Output Parameter:
4401 . A - the matrix
4402 
4403   Options Database Keys:
4404 + -mat_no_inode                     - Do not use inodes
4405 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4406 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4407                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4408                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4409 
4410   Level: intermediate
4411 
4412   Notes:
4413   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4414   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4415   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4416 
4417   If the *_nnz parameter is given then the *_nz parameter is ignored
4418 
4419   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4420   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4421   storage requirements for this matrix.
4422 
4423   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4424   processor than it must be used on all processors that share the object for
4425   that argument.
4426 
4427   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4428   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4429 
4430   The user MUST specify either the local or global matrix dimensions
4431   (possibly both).
4432 
4433   The parallel matrix is partitioned across processors such that the
4434   first `m0` rows belong to process 0, the next `m1` rows belong to
4435   process 1, the next `m2` rows belong to process 2, etc., where
4436   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4437   values corresponding to [m x N] submatrix.
4438 
4439   The columns are logically partitioned with the n0 columns belonging
4440   to 0th partition, the next n1 columns belonging to the next
4441   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4442 
4443   The DIAGONAL portion of the local submatrix on any given processor
4444   is the submatrix corresponding to the rows and columns m,n
4445   corresponding to the given processor. i.e diagonal matrix on
4446   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4447   etc. The remaining portion of the local submatrix [m x (N-n)]
4448   constitute the OFF-DIAGONAL portion. The example below better
4449   illustrates this concept. The two matrices, the DIAGONAL portion and
4450   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4451 
4452   For a square global matrix we define each processor's diagonal portion
4453   to be its local rows and the corresponding columns (a square submatrix);
4454   each processor's off-diagonal portion encompasses the remainder of the
4455   local matrix (a rectangular submatrix).
4456 
4457   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4458 
4459   When calling this routine with a single process communicator, a matrix of
4460   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4461   type of communicator, use the construction mechanism
4462 .vb
4463   MatCreate(..., &A);
4464   MatSetType(A, MATMPIAIJ);
4465   MatSetSizes(A, m, n, M, N);
4466   MatMPIAIJSetPreallocation(A, ...);
4467 .ve
4468 
4469   By default, this format uses inodes (identical nodes) when possible.
4470   We search for consecutive rows with the same nonzero structure, thereby
4471   reusing matrix information to achieve increased efficiency.
4472 
4473   Example Usage:
4474   Consider the following 8x8 matrix with 34 non-zero values, that is
4475   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4476   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4477   as follows
4478 
4479 .vb
4480             1  2  0  |  0  3  0  |  0  4
4481     Proc0   0  5  6  |  7  0  0  |  8  0
4482             9  0 10  | 11  0  0  | 12  0
4483     -------------------------------------
4484            13  0 14  | 15 16 17  |  0  0
4485     Proc1   0 18  0  | 19 20 21  |  0  0
4486             0  0  0  | 22 23  0  | 24  0
4487     -------------------------------------
4488     Proc2  25 26 27  |  0  0 28  | 29  0
4489            30  0  0  | 31 32 33  |  0 34
4490 .ve
4491 
4492   This can be represented as a collection of submatrices as
4493 
4494 .vb
4495       A B C
4496       D E F
4497       G H I
4498 .ve
4499 
4500   Where the submatrices A,B,C are owned by proc0, D,E,F are
4501   owned by proc1, G,H,I are owned by proc2.
4502 
4503   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4504   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4505   The 'M','N' parameters are 8,8, and have the same values on all procs.
4506 
4507   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4508   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4509   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4510   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4511   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4512   matrix, and [DF] as another SeqAIJ matrix.
4513 
4514   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4515   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4516   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4517   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4518   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4519   In this case, the values of `d_nz`,`o_nz` are
4520 .vb
4521      proc0  dnz = 2, o_nz = 2
4522      proc1  dnz = 3, o_nz = 2
4523      proc2  dnz = 1, o_nz = 4
4524 .ve
4525   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4526   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4527   for proc3. i.e we are using 12+15+10=37 storage locations to store
4528   34 values.
4529 
4530   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4531   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4532   In the above case the values for d_nnz,o_nnz are
4533 .vb
4534      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4535      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4536      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4537 .ve
4538   Here the space allocated is sum of all the above values i.e 34, and
4539   hence pre-allocation is perfect.
4540 
4541 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4542           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4543           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4544 @*/
4545 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4546 {
4547   PetscMPIInt size;
4548 
4549   PetscFunctionBegin;
4550   PetscCall(MatCreate(comm, A));
4551   PetscCall(MatSetSizes(*A, m, n, M, N));
4552   PetscCallMPI(MPI_Comm_size(comm, &size));
4553   if (size > 1) {
4554     PetscCall(MatSetType(*A, MATMPIAIJ));
4555     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4556   } else {
4557     PetscCall(MatSetType(*A, MATSEQAIJ));
4558     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4559   }
4560   PetscFunctionReturn(PETSC_SUCCESS);
4561 }
4562 
4563 /*MC
4564     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4565 
4566     Synopsis:
4567     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4568 
4569     Not Collective
4570 
4571     Input Parameter:
4572 .   A - the `MATMPIAIJ` matrix
4573 
4574     Output Parameters:
4575 +   Ad - the diagonal portion of the matrix
4576 .   Ao - the off-diagonal portion of the matrix
4577 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4578 -   ierr - error code
4579 
4580      Level: advanced
4581 
4582     Note:
4583     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4584 
4585 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4586 M*/
4587 
4588 /*MC
4589     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4590 
4591     Synopsis:
4592     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4593 
4594     Not Collective
4595 
4596     Input Parameters:
4597 +   A - the `MATMPIAIJ` matrix
4598 .   Ad - the diagonal portion of the matrix
4599 .   Ao - the off-diagonal portion of the matrix
4600 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4601 -   ierr - error code
4602 
4603      Level: advanced
4604 
4605 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4606 M*/
4607 
4608 /*@C
4609   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4610 
4611   Not Collective
4612 
4613   Input Parameter:
4614 . A - The `MATMPIAIJ` matrix
4615 
4616   Output Parameters:
4617 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4618 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4619 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4620 
4621   Level: intermediate
4622 
4623   Note:
4624   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4625   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4626   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4627   local column numbers to global column numbers in the original matrix.
4628 
4629   Fortran Notes:
4630   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4631 
4632 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4633 @*/
4634 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4635 {
4636   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4637   PetscBool   flg;
4638 
4639   PetscFunctionBegin;
4640   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4641   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4642   if (Ad) *Ad = a->A;
4643   if (Ao) *Ao = a->B;
4644   if (colmap) *colmap = a->garray;
4645   PetscFunctionReturn(PETSC_SUCCESS);
4646 }
4647 
4648 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4649 {
4650   PetscInt     m, N, i, rstart, nnz, Ii;
4651   PetscInt    *indx;
4652   PetscScalar *values;
4653   MatType      rootType;
4654 
4655   PetscFunctionBegin;
4656   PetscCall(MatGetSize(inmat, &m, &N));
4657   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4658     PetscInt *dnz, *onz, sum, bs, cbs;
4659 
4660     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4661     /* Check sum(n) = N */
4662     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4663     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4664 
4665     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4666     rstart -= m;
4667 
4668     MatPreallocateBegin(comm, m, n, dnz, onz);
4669     for (i = 0; i < m; i++) {
4670       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4671       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4672       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4673     }
4674 
4675     PetscCall(MatCreate(comm, outmat));
4676     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4677     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4678     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4679     PetscCall(MatGetRootType_Private(inmat, &rootType));
4680     PetscCall(MatSetType(*outmat, rootType));
4681     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4682     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4683     MatPreallocateEnd(dnz, onz);
4684     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4685   }
4686 
4687   /* numeric phase */
4688   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4689   for (i = 0; i < m; i++) {
4690     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4691     Ii = i + rstart;
4692     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4693     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4694   }
4695   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4696   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4697   PetscFunctionReturn(PETSC_SUCCESS);
4698 }
4699 
4700 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4701 {
4702   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4703 
4704   PetscFunctionBegin;
4705   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4706   PetscCall(PetscFree(merge->id_r));
4707   PetscCall(PetscFree(merge->len_s));
4708   PetscCall(PetscFree(merge->len_r));
4709   PetscCall(PetscFree(merge->bi));
4710   PetscCall(PetscFree(merge->bj));
4711   PetscCall(PetscFree(merge->buf_ri[0]));
4712   PetscCall(PetscFree(merge->buf_ri));
4713   PetscCall(PetscFree(merge->buf_rj[0]));
4714   PetscCall(PetscFree(merge->buf_rj));
4715   PetscCall(PetscFree(merge->coi));
4716   PetscCall(PetscFree(merge->coj));
4717   PetscCall(PetscFree(merge->owners_co));
4718   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4719   PetscCall(PetscFree(merge));
4720   PetscFunctionReturn(PETSC_SUCCESS);
4721 }
4722 
4723 #include <../src/mat/utils/freespace.h>
4724 #include <petscbt.h>
4725 
4726 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4727 {
4728   MPI_Comm             comm;
4729   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4730   PetscMPIInt          size, rank, taga, *len_s;
4731   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4732   PetscMPIInt          proc, k;
4733   PetscInt           **buf_ri, **buf_rj;
4734   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4735   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4736   MPI_Request         *s_waits, *r_waits;
4737   MPI_Status          *status;
4738   const MatScalar     *aa, *a_a;
4739   MatScalar          **abuf_r, *ba_i;
4740   Mat_Merge_SeqsToMPI *merge;
4741   PetscContainer       container;
4742 
4743   PetscFunctionBegin;
4744   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4745   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4746 
4747   PetscCallMPI(MPI_Comm_size(comm, &size));
4748   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4749 
4750   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4751   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4752   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4753   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4754   aa = a_a;
4755 
4756   bi     = merge->bi;
4757   bj     = merge->bj;
4758   buf_ri = merge->buf_ri;
4759   buf_rj = merge->buf_rj;
4760 
4761   PetscCall(PetscMalloc1(size, &status));
4762   owners = merge->rowmap->range;
4763   len_s  = merge->len_s;
4764 
4765   /* send and recv matrix values */
4766   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4767   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4768 
4769   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4770   for (proc = 0, k = 0; proc < size; proc++) {
4771     if (!len_s[proc]) continue;
4772     i = owners[proc];
4773     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4774     k++;
4775   }
4776 
4777   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4778   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4779   PetscCall(PetscFree(status));
4780 
4781   PetscCall(PetscFree(s_waits));
4782   PetscCall(PetscFree(r_waits));
4783 
4784   /* insert mat values of mpimat */
4785   PetscCall(PetscMalloc1(N, &ba_i));
4786   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4787 
4788   for (k = 0; k < merge->nrecv; k++) {
4789     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4790     nrows       = *buf_ri_k[k];
4791     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4792     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4793   }
4794 
4795   /* set values of ba */
4796   m = merge->rowmap->n;
4797   for (i = 0; i < m; i++) {
4798     arow = owners[rank] + i;
4799     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4800     bnzi = bi[i + 1] - bi[i];
4801     PetscCall(PetscArrayzero(ba_i, bnzi));
4802 
4803     /* add local non-zero vals of this proc's seqmat into ba */
4804     anzi   = ai[arow + 1] - ai[arow];
4805     aj     = a->j + ai[arow];
4806     aa     = a_a + ai[arow];
4807     nextaj = 0;
4808     for (j = 0; nextaj < anzi; j++) {
4809       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4810         ba_i[j] += aa[nextaj++];
4811       }
4812     }
4813 
4814     /* add received vals into ba */
4815     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4816       /* i-th row */
4817       if (i == *nextrow[k]) {
4818         anzi   = *(nextai[k] + 1) - *nextai[k];
4819         aj     = buf_rj[k] + *nextai[k];
4820         aa     = abuf_r[k] + *nextai[k];
4821         nextaj = 0;
4822         for (j = 0; nextaj < anzi; j++) {
4823           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4824             ba_i[j] += aa[nextaj++];
4825           }
4826         }
4827         nextrow[k]++;
4828         nextai[k]++;
4829       }
4830     }
4831     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4832   }
4833   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4834   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4835   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4836 
4837   PetscCall(PetscFree(abuf_r[0]));
4838   PetscCall(PetscFree(abuf_r));
4839   PetscCall(PetscFree(ba_i));
4840   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4841   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4842   PetscFunctionReturn(PETSC_SUCCESS);
4843 }
4844 
4845 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4846 {
4847   Mat                  B_mpi;
4848   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4849   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4850   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4851   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4852   PetscInt             len, *dnz, *onz, bs, cbs;
4853   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4854   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4855   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4856   MPI_Status          *status;
4857   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4858   PetscBT              lnkbt;
4859   Mat_Merge_SeqsToMPI *merge;
4860   PetscContainer       container;
4861 
4862   PetscFunctionBegin;
4863   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4864 
4865   /* make sure it is a PETSc comm */
4866   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4867   PetscCallMPI(MPI_Comm_size(comm, &size));
4868   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4869 
4870   PetscCall(PetscNew(&merge));
4871   PetscCall(PetscMalloc1(size, &status));
4872 
4873   /* determine row ownership */
4874   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4875   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4876   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4877   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4878   PetscCall(PetscLayoutSetUp(merge->rowmap));
4879   PetscCall(PetscMalloc1(size, &len_si));
4880   PetscCall(PetscMalloc1(size, &merge->len_s));
4881 
4882   m      = merge->rowmap->n;
4883   owners = merge->rowmap->range;
4884 
4885   /* determine the number of messages to send, their lengths */
4886   len_s = merge->len_s;
4887 
4888   len          = 0; /* length of buf_si[] */
4889   merge->nsend = 0;
4890   for (PetscMPIInt proc = 0; proc < size; proc++) {
4891     len_si[proc] = 0;
4892     if (proc == rank) {
4893       len_s[proc] = 0;
4894     } else {
4895       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4896       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4897     }
4898     if (len_s[proc]) {
4899       merge->nsend++;
4900       nrows = 0;
4901       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4902         if (ai[i + 1] > ai[i]) nrows++;
4903       }
4904       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4905       len += len_si[proc];
4906     }
4907   }
4908 
4909   /* determine the number and length of messages to receive for ij-structure */
4910   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4911   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4912 
4913   /* post the Irecv of j-structure */
4914   PetscCall(PetscCommGetNewTag(comm, &tagj));
4915   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4916 
4917   /* post the Isend of j-structure */
4918   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4919 
4920   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4921     if (!len_s[proc]) continue;
4922     i = owners[proc];
4923     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4924     k++;
4925   }
4926 
4927   /* receives and sends of j-structure are complete */
4928   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4929   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4930 
4931   /* send and recv i-structure */
4932   PetscCall(PetscCommGetNewTag(comm, &tagi));
4933   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4934 
4935   PetscCall(PetscMalloc1(len + 1, &buf_s));
4936   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4937   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4938     if (!len_s[proc]) continue;
4939     /* form outgoing message for i-structure:
4940          buf_si[0]:                 nrows to be sent
4941                [1:nrows]:           row index (global)
4942                [nrows+1:2*nrows+1]: i-structure index
4943     */
4944     nrows       = len_si[proc] / 2 - 1;
4945     buf_si_i    = buf_si + nrows + 1;
4946     buf_si[0]   = nrows;
4947     buf_si_i[0] = 0;
4948     nrows       = 0;
4949     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4950       anzi = ai[i + 1] - ai[i];
4951       if (anzi) {
4952         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4953         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4954         nrows++;
4955       }
4956     }
4957     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4958     k++;
4959     buf_si += len_si[proc];
4960   }
4961 
4962   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4963   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4964 
4965   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4966   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4967 
4968   PetscCall(PetscFree(len_si));
4969   PetscCall(PetscFree(len_ri));
4970   PetscCall(PetscFree(rj_waits));
4971   PetscCall(PetscFree2(si_waits, sj_waits));
4972   PetscCall(PetscFree(ri_waits));
4973   PetscCall(PetscFree(buf_s));
4974   PetscCall(PetscFree(status));
4975 
4976   /* compute a local seq matrix in each processor */
4977   /* allocate bi array and free space for accumulating nonzero column info */
4978   PetscCall(PetscMalloc1(m + 1, &bi));
4979   bi[0] = 0;
4980 
4981   /* create and initialize a linked list */
4982   nlnk = N + 1;
4983   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4984 
4985   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4986   len = ai[owners[rank + 1]] - ai[owners[rank]];
4987   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4988 
4989   current_space = free_space;
4990 
4991   /* determine symbolic info for each local row */
4992   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4993 
4994   for (k = 0; k < merge->nrecv; k++) {
4995     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4996     nrows       = *buf_ri_k[k];
4997     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4998     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4999   }
5000 
5001   MatPreallocateBegin(comm, m, n, dnz, onz);
5002   len = 0;
5003   for (i = 0; i < m; i++) {
5004     bnzi = 0;
5005     /* add local non-zero cols of this proc's seqmat into lnk */
5006     arow = owners[rank] + i;
5007     anzi = ai[arow + 1] - ai[arow];
5008     aj   = a->j + ai[arow];
5009     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5010     bnzi += nlnk;
5011     /* add received col data into lnk */
5012     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5013       if (i == *nextrow[k]) {            /* i-th row */
5014         anzi = *(nextai[k] + 1) - *nextai[k];
5015         aj   = buf_rj[k] + *nextai[k];
5016         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5017         bnzi += nlnk;
5018         nextrow[k]++;
5019         nextai[k]++;
5020       }
5021     }
5022     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5023 
5024     /* if free space is not available, make more free space */
5025     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5026     /* copy data into free space, then initialize lnk */
5027     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5028     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5029 
5030     current_space->array += bnzi;
5031     current_space->local_used += bnzi;
5032     current_space->local_remaining -= bnzi;
5033 
5034     bi[i + 1] = bi[i] + bnzi;
5035   }
5036 
5037   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5038 
5039   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5040   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5041   PetscCall(PetscLLDestroy(lnk, lnkbt));
5042 
5043   /* create symbolic parallel matrix B_mpi */
5044   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5045   PetscCall(MatCreate(comm, &B_mpi));
5046   if (n == PETSC_DECIDE) {
5047     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5048   } else {
5049     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5050   }
5051   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5052   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5053   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5054   MatPreallocateEnd(dnz, onz);
5055   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5056 
5057   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5058   B_mpi->assembled = PETSC_FALSE;
5059   merge->bi        = bi;
5060   merge->bj        = bj;
5061   merge->buf_ri    = buf_ri;
5062   merge->buf_rj    = buf_rj;
5063   merge->coi       = NULL;
5064   merge->coj       = NULL;
5065   merge->owners_co = NULL;
5066 
5067   PetscCall(PetscCommDestroy(&comm));
5068 
5069   /* attach the supporting struct to B_mpi for reuse */
5070   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5071   PetscCall(PetscContainerSetPointer(container, merge));
5072   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5073   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5074   PetscCall(PetscContainerDestroy(&container));
5075   *mpimat = B_mpi;
5076 
5077   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5078   PetscFunctionReturn(PETSC_SUCCESS);
5079 }
5080 
5081 /*@
5082   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5083   matrices from each processor
5084 
5085   Collective
5086 
5087   Input Parameters:
5088 + comm   - the communicators the parallel matrix will live on
5089 . seqmat - the input sequential matrices
5090 . m      - number of local rows (or `PETSC_DECIDE`)
5091 . n      - number of local columns (or `PETSC_DECIDE`)
5092 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5093 
5094   Output Parameter:
5095 . mpimat - the parallel matrix generated
5096 
5097   Level: advanced
5098 
5099   Note:
5100   The dimensions of the sequential matrix in each processor MUST be the same.
5101   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5102   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5103 
5104 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5105 @*/
5106 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5107 {
5108   PetscMPIInt size;
5109 
5110   PetscFunctionBegin;
5111   PetscCallMPI(MPI_Comm_size(comm, &size));
5112   if (size == 1) {
5113     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5114     if (scall == MAT_INITIAL_MATRIX) {
5115       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5116     } else {
5117       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5118     }
5119     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5120     PetscFunctionReturn(PETSC_SUCCESS);
5121   }
5122   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5123   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5124   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5125   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5126   PetscFunctionReturn(PETSC_SUCCESS);
5127 }
5128 
5129 /*@
5130   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5131 
5132   Not Collective
5133 
5134   Input Parameter:
5135 . A - the matrix
5136 
5137   Output Parameter:
5138 . A_loc - the local sequential matrix generated
5139 
5140   Level: developer
5141 
5142   Notes:
5143   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5144   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5145   `n` is the global column count obtained with `MatGetSize()`
5146 
5147   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5148 
5149   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5150 
5151   Destroy the matrix with `MatDestroy()`
5152 
5153 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5154 @*/
5155 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5156 {
5157   PetscBool mpi;
5158 
5159   PetscFunctionBegin;
5160   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5161   if (mpi) {
5162     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5163   } else {
5164     *A_loc = A;
5165     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5166   }
5167   PetscFunctionReturn(PETSC_SUCCESS);
5168 }
5169 
5170 /*@
5171   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5172 
5173   Not Collective
5174 
5175   Input Parameters:
5176 + A     - the matrix
5177 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5178 
5179   Output Parameter:
5180 . A_loc - the local sequential matrix generated
5181 
5182   Level: developer
5183 
5184   Notes:
5185   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5186   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5187   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5188 
5189   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5190 
5191   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5192   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5193   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5194   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5195 
5196 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5197 @*/
5198 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5199 {
5200   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5201   Mat_SeqAIJ        *mat, *a, *b;
5202   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5203   const PetscScalar *aa, *ba, *aav, *bav;
5204   PetscScalar       *ca, *cam;
5205   PetscMPIInt        size;
5206   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5207   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5208   PetscBool          match;
5209 
5210   PetscFunctionBegin;
5211   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5212   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5213   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5214   if (size == 1) {
5215     if (scall == MAT_INITIAL_MATRIX) {
5216       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5217       *A_loc = mpimat->A;
5218     } else if (scall == MAT_REUSE_MATRIX) {
5219       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5220     }
5221     PetscFunctionReturn(PETSC_SUCCESS);
5222   }
5223 
5224   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5225   a  = (Mat_SeqAIJ *)mpimat->A->data;
5226   b  = (Mat_SeqAIJ *)mpimat->B->data;
5227   ai = a->i;
5228   aj = a->j;
5229   bi = b->i;
5230   bj = b->j;
5231   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5232   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5233   aa = aav;
5234   ba = bav;
5235   if (scall == MAT_INITIAL_MATRIX) {
5236     PetscCall(PetscMalloc1(1 + am, &ci));
5237     ci[0] = 0;
5238     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5239     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5240     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5241     k = 0;
5242     for (i = 0; i < am; i++) {
5243       ncols_o = bi[i + 1] - bi[i];
5244       ncols_d = ai[i + 1] - ai[i];
5245       /* off-diagonal portion of A */
5246       for (jo = 0; jo < ncols_o; jo++) {
5247         col = cmap[*bj];
5248         if (col >= cstart) break;
5249         cj[k] = col;
5250         bj++;
5251         ca[k++] = *ba++;
5252       }
5253       /* diagonal portion of A */
5254       for (j = 0; j < ncols_d; j++) {
5255         cj[k]   = cstart + *aj++;
5256         ca[k++] = *aa++;
5257       }
5258       /* off-diagonal portion of A */
5259       for (j = jo; j < ncols_o; j++) {
5260         cj[k]   = cmap[*bj++];
5261         ca[k++] = *ba++;
5262       }
5263     }
5264     /* put together the new matrix */
5265     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5266     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5267     /* Since these are PETSc arrays, change flags to free them as necessary. */
5268     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5269     mat->free_a  = PETSC_TRUE;
5270     mat->free_ij = PETSC_TRUE;
5271     mat->nonew   = 0;
5272   } else if (scall == MAT_REUSE_MATRIX) {
5273     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5274     ci  = mat->i;
5275     cj  = mat->j;
5276     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5277     for (i = 0; i < am; i++) {
5278       /* off-diagonal portion of A */
5279       ncols_o = bi[i + 1] - bi[i];
5280       for (jo = 0; jo < ncols_o; jo++) {
5281         col = cmap[*bj];
5282         if (col >= cstart) break;
5283         *cam++ = *ba++;
5284         bj++;
5285       }
5286       /* diagonal portion of A */
5287       ncols_d = ai[i + 1] - ai[i];
5288       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5289       /* off-diagonal portion of A */
5290       for (j = jo; j < ncols_o; j++) {
5291         *cam++ = *ba++;
5292         bj++;
5293       }
5294     }
5295     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5296   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5297   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5298   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5299   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5300   PetscFunctionReturn(PETSC_SUCCESS);
5301 }
5302 
5303 /*@
5304   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5305   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5306 
5307   Not Collective
5308 
5309   Input Parameters:
5310 + A     - the matrix
5311 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5312 
5313   Output Parameters:
5314 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5315 - A_loc - the local sequential matrix generated
5316 
5317   Level: developer
5318 
5319   Note:
5320   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5321   part, then those associated with the off-diagonal part (in its local ordering)
5322 
5323 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5324 @*/
5325 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5326 {
5327   Mat             Ao, Ad;
5328   const PetscInt *cmap;
5329   PetscMPIInt     size;
5330   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5331 
5332   PetscFunctionBegin;
5333   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5334   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5335   if (size == 1) {
5336     if (scall == MAT_INITIAL_MATRIX) {
5337       PetscCall(PetscObjectReference((PetscObject)Ad));
5338       *A_loc = Ad;
5339     } else if (scall == MAT_REUSE_MATRIX) {
5340       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5341     }
5342     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5343     PetscFunctionReturn(PETSC_SUCCESS);
5344   }
5345   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5346   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5347   if (f) {
5348     PetscCall((*f)(A, scall, glob, A_loc));
5349   } else {
5350     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5351     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5352     Mat_SeqAIJ        *c;
5353     PetscInt          *ai = a->i, *aj = a->j;
5354     PetscInt          *bi = b->i, *bj = b->j;
5355     PetscInt          *ci, *cj;
5356     const PetscScalar *aa, *ba;
5357     PetscScalar       *ca;
5358     PetscInt           i, j, am, dn, on;
5359 
5360     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5361     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5362     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5363     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5364     if (scall == MAT_INITIAL_MATRIX) {
5365       PetscInt k;
5366       PetscCall(PetscMalloc1(1 + am, &ci));
5367       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5368       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5369       ci[0] = 0;
5370       for (i = 0, k = 0; i < am; i++) {
5371         const PetscInt ncols_o = bi[i + 1] - bi[i];
5372         const PetscInt ncols_d = ai[i + 1] - ai[i];
5373         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5374         /* diagonal portion of A */
5375         for (j = 0; j < ncols_d; j++, k++) {
5376           cj[k] = *aj++;
5377           ca[k] = *aa++;
5378         }
5379         /* off-diagonal portion of A */
5380         for (j = 0; j < ncols_o; j++, k++) {
5381           cj[k] = dn + *bj++;
5382           ca[k] = *ba++;
5383         }
5384       }
5385       /* put together the new matrix */
5386       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5387       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5388       /* Since these are PETSc arrays, change flags to free them as necessary. */
5389       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5390       c->free_a  = PETSC_TRUE;
5391       c->free_ij = PETSC_TRUE;
5392       c->nonew   = 0;
5393       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5394     } else if (scall == MAT_REUSE_MATRIX) {
5395       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5396       for (i = 0; i < am; i++) {
5397         const PetscInt ncols_d = ai[i + 1] - ai[i];
5398         const PetscInt ncols_o = bi[i + 1] - bi[i];
5399         /* diagonal portion of A */
5400         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5401         /* off-diagonal portion of A */
5402         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5403       }
5404       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5405     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5406     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5407     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5408     if (glob) {
5409       PetscInt cst, *gidx;
5410 
5411       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5412       PetscCall(PetscMalloc1(dn + on, &gidx));
5413       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5414       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5415       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5416     }
5417   }
5418   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5419   PetscFunctionReturn(PETSC_SUCCESS);
5420 }
5421 
5422 /*@C
5423   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5424 
5425   Not Collective
5426 
5427   Input Parameters:
5428 + A     - the matrix
5429 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5430 . row   - index set of rows to extract (or `NULL`)
5431 - col   - index set of columns to extract (or `NULL`)
5432 
5433   Output Parameter:
5434 . A_loc - the local sequential matrix generated
5435 
5436   Level: developer
5437 
5438 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5439 @*/
5440 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5441 {
5442   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5443   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5444   IS          isrowa, iscola;
5445   Mat        *aloc;
5446   PetscBool   match;
5447 
5448   PetscFunctionBegin;
5449   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5450   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5451   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5452   if (!row) {
5453     start = A->rmap->rstart;
5454     end   = A->rmap->rend;
5455     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5456   } else {
5457     isrowa = *row;
5458   }
5459   if (!col) {
5460     start = A->cmap->rstart;
5461     cmap  = a->garray;
5462     nzA   = a->A->cmap->n;
5463     nzB   = a->B->cmap->n;
5464     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5465     ncols = 0;
5466     for (i = 0; i < nzB; i++) {
5467       if (cmap[i] < start) idx[ncols++] = cmap[i];
5468       else break;
5469     }
5470     imark = i;
5471     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5472     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5473     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5474   } else {
5475     iscola = *col;
5476   }
5477   if (scall != MAT_INITIAL_MATRIX) {
5478     PetscCall(PetscMalloc1(1, &aloc));
5479     aloc[0] = *A_loc;
5480   }
5481   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5482   if (!col) { /* attach global id of condensed columns */
5483     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5484   }
5485   *A_loc = aloc[0];
5486   PetscCall(PetscFree(aloc));
5487   if (!row) PetscCall(ISDestroy(&isrowa));
5488   if (!col) PetscCall(ISDestroy(&iscola));
5489   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5490   PetscFunctionReturn(PETSC_SUCCESS);
5491 }
5492 
5493 /*
5494  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5495  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5496  * on a global size.
5497  * */
5498 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5499 {
5500   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5501   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5502   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5503   PetscMPIInt            owner;
5504   PetscSFNode           *iremote, *oiremote;
5505   const PetscInt        *lrowindices;
5506   PetscSF                sf, osf;
5507   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5508   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5509   MPI_Comm               comm;
5510   ISLocalToGlobalMapping mapping;
5511   const PetscScalar     *pd_a, *po_a;
5512 
5513   PetscFunctionBegin;
5514   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5515   /* plocalsize is the number of roots
5516    * nrows is the number of leaves
5517    * */
5518   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5519   PetscCall(ISGetLocalSize(rows, &nrows));
5520   PetscCall(PetscCalloc1(nrows, &iremote));
5521   PetscCall(ISGetIndices(rows, &lrowindices));
5522   for (i = 0; i < nrows; i++) {
5523     /* Find a remote index and an owner for a row
5524      * The row could be local or remote
5525      * */
5526     owner = 0;
5527     lidx  = 0;
5528     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5529     iremote[i].index = lidx;
5530     iremote[i].rank  = owner;
5531   }
5532   /* Create SF to communicate how many nonzero columns for each row */
5533   PetscCall(PetscSFCreate(comm, &sf));
5534   /* SF will figure out the number of nonzero columns for each row, and their
5535    * offsets
5536    * */
5537   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5538   PetscCall(PetscSFSetFromOptions(sf));
5539   PetscCall(PetscSFSetUp(sf));
5540 
5541   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5542   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5543   PetscCall(PetscCalloc1(nrows, &pnnz));
5544   roffsets[0] = 0;
5545   roffsets[1] = 0;
5546   for (i = 0; i < plocalsize; i++) {
5547     /* diagonal */
5548     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5549     /* off-diagonal */
5550     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5551     /* compute offsets so that we relative location for each row */
5552     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5553     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5554   }
5555   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5556   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5557   /* 'r' means root, and 'l' means leaf */
5558   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5559   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5560   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5561   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5562   PetscCall(PetscSFDestroy(&sf));
5563   PetscCall(PetscFree(roffsets));
5564   PetscCall(PetscFree(nrcols));
5565   dntotalcols = 0;
5566   ontotalcols = 0;
5567   ncol        = 0;
5568   for (i = 0; i < nrows; i++) {
5569     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5570     ncol    = PetscMax(pnnz[i], ncol);
5571     /* diagonal */
5572     dntotalcols += nlcols[i * 2 + 0];
5573     /* off-diagonal */
5574     ontotalcols += nlcols[i * 2 + 1];
5575   }
5576   /* We do not need to figure the right number of columns
5577    * since all the calculations will be done by going through the raw data
5578    * */
5579   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5580   PetscCall(MatSetUp(*P_oth));
5581   PetscCall(PetscFree(pnnz));
5582   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5583   /* diagonal */
5584   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5585   /* off-diagonal */
5586   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5587   /* diagonal */
5588   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5589   /* off-diagonal */
5590   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5591   dntotalcols = 0;
5592   ontotalcols = 0;
5593   ntotalcols  = 0;
5594   for (i = 0; i < nrows; i++) {
5595     owner = 0;
5596     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5597     /* Set iremote for diag matrix */
5598     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5599       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5600       iremote[dntotalcols].rank  = owner;
5601       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5602       ilocal[dntotalcols++] = ntotalcols++;
5603     }
5604     /* off-diagonal */
5605     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5606       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5607       oiremote[ontotalcols].rank  = owner;
5608       oilocal[ontotalcols++]      = ntotalcols++;
5609     }
5610   }
5611   PetscCall(ISRestoreIndices(rows, &lrowindices));
5612   PetscCall(PetscFree(loffsets));
5613   PetscCall(PetscFree(nlcols));
5614   PetscCall(PetscSFCreate(comm, &sf));
5615   /* P serves as roots and P_oth is leaves
5616    * Diag matrix
5617    * */
5618   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5619   PetscCall(PetscSFSetFromOptions(sf));
5620   PetscCall(PetscSFSetUp(sf));
5621 
5622   PetscCall(PetscSFCreate(comm, &osf));
5623   /* off-diagonal */
5624   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5625   PetscCall(PetscSFSetFromOptions(osf));
5626   PetscCall(PetscSFSetUp(osf));
5627   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5628   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5629   /* operate on the matrix internal data to save memory */
5630   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5631   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5632   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5633   /* Convert to global indices for diag matrix */
5634   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5635   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5636   /* We want P_oth store global indices */
5637   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5638   /* Use memory scalable approach */
5639   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5640   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5641   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5642   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5643   /* Convert back to local indices */
5644   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5645   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5646   nout = 0;
5647   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5648   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5649   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5650   /* Exchange values */
5651   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5652   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5653   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5654   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5655   /* Stop PETSc from shrinking memory */
5656   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5657   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5658   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5659   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5660   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5661   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5662   PetscCall(PetscSFDestroy(&sf));
5663   PetscCall(PetscSFDestroy(&osf));
5664   PetscFunctionReturn(PETSC_SUCCESS);
5665 }
5666 
5667 /*
5668  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5669  * This supports MPIAIJ and MAIJ
5670  * */
5671 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5672 {
5673   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5674   Mat_SeqAIJ *p_oth;
5675   IS          rows, map;
5676   PetscHMapI  hamp;
5677   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5678   MPI_Comm    comm;
5679   PetscSF     sf, osf;
5680   PetscBool   has;
5681 
5682   PetscFunctionBegin;
5683   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5684   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5685   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5686    *  and then create a submatrix (that often is an overlapping matrix)
5687    * */
5688   if (reuse == MAT_INITIAL_MATRIX) {
5689     /* Use a hash table to figure out unique keys */
5690     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5691     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5692     count = 0;
5693     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5694     for (i = 0; i < a->B->cmap->n; i++) {
5695       key = a->garray[i] / dof;
5696       PetscCall(PetscHMapIHas(hamp, key, &has));
5697       if (!has) {
5698         mapping[i] = count;
5699         PetscCall(PetscHMapISet(hamp, key, count++));
5700       } else {
5701         /* Current 'i' has the same value the previous step */
5702         mapping[i] = count - 1;
5703       }
5704     }
5705     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5706     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5707     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5708     PetscCall(PetscCalloc1(htsize, &rowindices));
5709     off = 0;
5710     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5711     PetscCall(PetscHMapIDestroy(&hamp));
5712     PetscCall(PetscSortInt(htsize, rowindices));
5713     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5714     /* In case, the matrix was already created but users want to recreate the matrix */
5715     PetscCall(MatDestroy(P_oth));
5716     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5717     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5718     PetscCall(ISDestroy(&map));
5719     PetscCall(ISDestroy(&rows));
5720   } else if (reuse == MAT_REUSE_MATRIX) {
5721     /* If matrix was already created, we simply update values using SF objects
5722      * that as attached to the matrix earlier.
5723      */
5724     const PetscScalar *pd_a, *po_a;
5725 
5726     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5727     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5728     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5729     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5730     /* Update values in place */
5731     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5732     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5733     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5734     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5735     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5736     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5737     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5738     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5739   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5740   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5741   PetscFunctionReturn(PETSC_SUCCESS);
5742 }
5743 
5744 /*@C
5745   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5746 
5747   Collective
5748 
5749   Input Parameters:
5750 + A     - the first matrix in `MATMPIAIJ` format
5751 . B     - the second matrix in `MATMPIAIJ` format
5752 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5753 
5754   Output Parameters:
5755 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5756 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5757 - B_seq - the sequential matrix generated
5758 
5759   Level: developer
5760 
5761 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5762 @*/
5763 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5764 {
5765   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5766   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5767   IS          isrowb, iscolb;
5768   Mat        *bseq = NULL;
5769 
5770   PetscFunctionBegin;
5771   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5772              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5773   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5774 
5775   if (scall == MAT_INITIAL_MATRIX) {
5776     start = A->cmap->rstart;
5777     cmap  = a->garray;
5778     nzA   = a->A->cmap->n;
5779     nzB   = a->B->cmap->n;
5780     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5781     ncols = 0;
5782     for (i = 0; i < nzB; i++) { /* row < local row index */
5783       if (cmap[i] < start) idx[ncols++] = cmap[i];
5784       else break;
5785     }
5786     imark = i;
5787     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5788     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5789     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5790     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5791   } else {
5792     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5793     isrowb = *rowb;
5794     iscolb = *colb;
5795     PetscCall(PetscMalloc1(1, &bseq));
5796     bseq[0] = *B_seq;
5797   }
5798   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5799   *B_seq = bseq[0];
5800   PetscCall(PetscFree(bseq));
5801   if (!rowb) {
5802     PetscCall(ISDestroy(&isrowb));
5803   } else {
5804     *rowb = isrowb;
5805   }
5806   if (!colb) {
5807     PetscCall(ISDestroy(&iscolb));
5808   } else {
5809     *colb = iscolb;
5810   }
5811   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5812   PetscFunctionReturn(PETSC_SUCCESS);
5813 }
5814 
5815 /*
5816     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5817     of the OFF-DIAGONAL portion of local A
5818 
5819     Collective
5820 
5821    Input Parameters:
5822 +    A,B - the matrices in `MATMPIAIJ` format
5823 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5824 
5825    Output Parameter:
5826 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5827 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5828 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5829 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5830 
5831     Developer Note:
5832     This directly accesses information inside the VecScatter associated with the matrix-vector product
5833      for this matrix. This is not desirable..
5834 
5835     Level: developer
5836 
5837 */
5838 
5839 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5840 {
5841   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5842   VecScatter         ctx;
5843   MPI_Comm           comm;
5844   const PetscMPIInt *rprocs, *sprocs;
5845   PetscMPIInt        nrecvs, nsends;
5846   const PetscInt    *srow, *rstarts, *sstarts;
5847   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5848   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5849   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5850   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5851   PetscMPIInt        size, tag, rank, nreqs;
5852 
5853   PetscFunctionBegin;
5854   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5855   PetscCallMPI(MPI_Comm_size(comm, &size));
5856 
5857   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5858              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5859   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5860   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5861 
5862   if (size == 1) {
5863     startsj_s = NULL;
5864     bufa_ptr  = NULL;
5865     *B_oth    = NULL;
5866     PetscFunctionReturn(PETSC_SUCCESS);
5867   }
5868 
5869   ctx = a->Mvctx;
5870   tag = ((PetscObject)ctx)->tag;
5871 
5872   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5873   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5874   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5875   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5876   PetscCall(PetscMalloc1(nreqs, &reqs));
5877   rwaits = reqs;
5878   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5879 
5880   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5881   if (scall == MAT_INITIAL_MATRIX) {
5882     /* i-array */
5883     /*  post receives */
5884     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5885     for (i = 0; i < nrecvs; i++) {
5886       rowlen = rvalues + rstarts[i] * rbs;
5887       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5888       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5889     }
5890 
5891     /* pack the outgoing message */
5892     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5893 
5894     sstartsj[0] = 0;
5895     rstartsj[0] = 0;
5896     len         = 0; /* total length of j or a array to be sent */
5897     if (nsends) {
5898       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5899       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5900     }
5901     for (i = 0; i < nsends; i++) {
5902       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5903       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5904       for (j = 0; j < nrows; j++) {
5905         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5906         for (l = 0; l < sbs; l++) {
5907           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5908 
5909           rowlen[j * sbs + l] = ncols;
5910 
5911           len += ncols;
5912           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5913         }
5914         k++;
5915       }
5916       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5917 
5918       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5919     }
5920     /* recvs and sends of i-array are completed */
5921     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5922     PetscCall(PetscFree(svalues));
5923 
5924     /* allocate buffers for sending j and a arrays */
5925     PetscCall(PetscMalloc1(len + 1, &bufj));
5926     PetscCall(PetscMalloc1(len + 1, &bufa));
5927 
5928     /* create i-array of B_oth */
5929     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5930 
5931     b_othi[0] = 0;
5932     len       = 0; /* total length of j or a array to be received */
5933     k         = 0;
5934     for (i = 0; i < nrecvs; i++) {
5935       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5936       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5937       for (j = 0; j < nrows; j++) {
5938         b_othi[k + 1] = b_othi[k] + rowlen[j];
5939         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5940         k++;
5941       }
5942       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5943     }
5944     PetscCall(PetscFree(rvalues));
5945 
5946     /* allocate space for j and a arrays of B_oth */
5947     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5948     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5949 
5950     /* j-array */
5951     /*  post receives of j-array */
5952     for (i = 0; i < nrecvs; i++) {
5953       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5954       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5955     }
5956 
5957     /* pack the outgoing message j-array */
5958     if (nsends) k = sstarts[0];
5959     for (i = 0; i < nsends; i++) {
5960       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5961       bufJ  = bufj + sstartsj[i];
5962       for (j = 0; j < nrows; j++) {
5963         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5964         for (ll = 0; ll < sbs; ll++) {
5965           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5966           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5967           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5968         }
5969       }
5970       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5971     }
5972 
5973     /* recvs and sends of j-array are completed */
5974     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5975   } else if (scall == MAT_REUSE_MATRIX) {
5976     sstartsj = *startsj_s;
5977     rstartsj = *startsj_r;
5978     bufa     = *bufa_ptr;
5979     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5980   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5981 
5982   /* a-array */
5983   /*  post receives of a-array */
5984   for (i = 0; i < nrecvs; i++) {
5985     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5986     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5987   }
5988 
5989   /* pack the outgoing message a-array */
5990   if (nsends) k = sstarts[0];
5991   for (i = 0; i < nsends; i++) {
5992     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5993     bufA  = bufa + sstartsj[i];
5994     for (j = 0; j < nrows; j++) {
5995       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5996       for (ll = 0; ll < sbs; ll++) {
5997         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5998         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5999         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6000       }
6001     }
6002     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6003   }
6004   /* recvs and sends of a-array are completed */
6005   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6006   PetscCall(PetscFree(reqs));
6007 
6008   if (scall == MAT_INITIAL_MATRIX) {
6009     Mat_SeqAIJ *b_oth;
6010 
6011     /* put together the new matrix */
6012     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6013 
6014     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6015     /* Since these are PETSc arrays, change flags to free them as necessary. */
6016     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6017     b_oth->free_a  = PETSC_TRUE;
6018     b_oth->free_ij = PETSC_TRUE;
6019     b_oth->nonew   = 0;
6020 
6021     PetscCall(PetscFree(bufj));
6022     if (!startsj_s || !bufa_ptr) {
6023       PetscCall(PetscFree2(sstartsj, rstartsj));
6024       PetscCall(PetscFree(bufa_ptr));
6025     } else {
6026       *startsj_s = sstartsj;
6027       *startsj_r = rstartsj;
6028       *bufa_ptr  = bufa;
6029     }
6030   } else if (scall == MAT_REUSE_MATRIX) {
6031     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6032   }
6033 
6034   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6035   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6036   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6037   PetscFunctionReturn(PETSC_SUCCESS);
6038 }
6039 
6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6042 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6043 #if defined(PETSC_HAVE_MKL_SPARSE)
6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6045 #endif
6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6048 #if defined(PETSC_HAVE_ELEMENTAL)
6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6050 #endif
6051 #if defined(PETSC_HAVE_SCALAPACK)
6052 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6053 #endif
6054 #if defined(PETSC_HAVE_HYPRE)
6055 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6056 #endif
6057 #if defined(PETSC_HAVE_CUDA)
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6059 #endif
6060 #if defined(PETSC_HAVE_HIP)
6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6062 #endif
6063 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6065 #endif
6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6067 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6068 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6069 
6070 /*
6071     Computes (B'*A')' since computing B*A directly is untenable
6072 
6073                n                       p                          p
6074         [             ]       [             ]         [                 ]
6075       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6076         [             ]       [             ]         [                 ]
6077 
6078 */
6079 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6080 {
6081   Mat At, Bt, Ct;
6082 
6083   PetscFunctionBegin;
6084   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6085   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6086   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6087   PetscCall(MatDestroy(&At));
6088   PetscCall(MatDestroy(&Bt));
6089   PetscCall(MatTransposeSetPrecursor(Ct, C));
6090   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6091   PetscCall(MatDestroy(&Ct));
6092   PetscFunctionReturn(PETSC_SUCCESS);
6093 }
6094 
6095 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6096 {
6097   PetscBool cisdense;
6098 
6099   PetscFunctionBegin;
6100   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6101   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6102   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6103   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6104   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6105   PetscCall(MatSetUp(C));
6106 
6107   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6108   PetscFunctionReturn(PETSC_SUCCESS);
6109 }
6110 
6111 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6112 {
6113   Mat_Product *product = C->product;
6114   Mat          A = product->A, B = product->B;
6115 
6116   PetscFunctionBegin;
6117   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6118              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6119   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6120   C->ops->productsymbolic = MatProductSymbolic_AB;
6121   PetscFunctionReturn(PETSC_SUCCESS);
6122 }
6123 
6124 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6125 {
6126   Mat_Product *product = C->product;
6127 
6128   PetscFunctionBegin;
6129   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6130   PetscFunctionReturn(PETSC_SUCCESS);
6131 }
6132 
6133 /*
6134    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6135 
6136   Input Parameters:
6137 
6138     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6139     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6140 
6141     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6142 
6143     For Set1, j1[] contains column indices of the nonzeros.
6144     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6145     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6146     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6147 
6148     Similar for Set2.
6149 
6150     This routine merges the two sets of nonzeros row by row and removes repeats.
6151 
6152   Output Parameters: (memory is allocated by the caller)
6153 
6154     i[],j[]: the CSR of the merged matrix, which has m rows.
6155     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6156     imap2[]: similar to imap1[], but for Set2.
6157     Note we order nonzeros row-by-row and from left to right.
6158 */
6159 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6160 {
6161   PetscInt   r, m; /* Row index of mat */
6162   PetscCount t, t1, t2, b1, e1, b2, e2;
6163 
6164   PetscFunctionBegin;
6165   PetscCall(MatGetLocalSize(mat, &m, NULL));
6166   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6167   i[0]        = 0;
6168   for (r = 0; r < m; r++) { /* Do row by row merging */
6169     b1 = rowBegin1[r];
6170     e1 = rowEnd1[r];
6171     b2 = rowBegin2[r];
6172     e2 = rowEnd2[r];
6173     while (b1 < e1 && b2 < e2) {
6174       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6175         j[t]      = j1[b1];
6176         imap1[t1] = t;
6177         imap2[t2] = t;
6178         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6179         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6180         t1++;
6181         t2++;
6182         t++;
6183       } else if (j1[b1] < j2[b2]) {
6184         j[t]      = j1[b1];
6185         imap1[t1] = t;
6186         b1 += jmap1[t1 + 1] - jmap1[t1];
6187         t1++;
6188         t++;
6189       } else {
6190         j[t]      = j2[b2];
6191         imap2[t2] = t;
6192         b2 += jmap2[t2 + 1] - jmap2[t2];
6193         t2++;
6194         t++;
6195       }
6196     }
6197     /* Merge the remaining in either j1[] or j2[] */
6198     while (b1 < e1) {
6199       j[t]      = j1[b1];
6200       imap1[t1] = t;
6201       b1 += jmap1[t1 + 1] - jmap1[t1];
6202       t1++;
6203       t++;
6204     }
6205     while (b2 < e2) {
6206       j[t]      = j2[b2];
6207       imap2[t2] = t;
6208       b2 += jmap2[t2 + 1] - jmap2[t2];
6209       t2++;
6210       t++;
6211     }
6212     PetscCall(PetscIntCast(t, i + r + 1));
6213   }
6214   PetscFunctionReturn(PETSC_SUCCESS);
6215 }
6216 
6217 /*
6218   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6219 
6220   Input Parameters:
6221     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6222     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6223       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6224 
6225       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6226       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6227 
6228   Output Parameters:
6229     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6230     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6231       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6232       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6233 
6234     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6235       Atot: number of entries belonging to the diagonal block.
6236       Annz: number of unique nonzeros belonging to the diagonal block.
6237       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6238         repeats (i.e., same 'i,j' pair).
6239       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6240         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6241 
6242       Atot: number of entries belonging to the diagonal block
6243       Annz: number of unique nonzeros belonging to the diagonal block.
6244 
6245     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6246 
6247     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6248 */
6249 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6250 {
6251   PetscInt    cstart, cend, rstart, rend, row, col;
6252   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6253   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6254   PetscCount  k, m, p, q, r, s, mid;
6255   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6256 
6257   PetscFunctionBegin;
6258   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6259   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6260   m = rend - rstart;
6261 
6262   /* Skip negative rows */
6263   for (k = 0; k < n; k++)
6264     if (i[k] >= 0) break;
6265 
6266   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6267      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6268   */
6269   while (k < n) {
6270     row = i[k];
6271     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6272     for (s = k; s < n; s++)
6273       if (i[s] != row) break;
6274 
6275     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6276     for (p = k; p < s; p++) {
6277       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6278       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6279     }
6280     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6281     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6282     rowBegin[row - rstart] = k;
6283     rowMid[row - rstart]   = mid;
6284     rowEnd[row - rstart]   = s;
6285 
6286     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6287     Atot += mid - k;
6288     Btot += s - mid;
6289 
6290     /* Count unique nonzeros of this diag row */
6291     for (p = k; p < mid;) {
6292       col = j[p];
6293       do {
6294         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6295         p++;
6296       } while (p < mid && j[p] == col);
6297       Annz++;
6298     }
6299 
6300     /* Count unique nonzeros of this offdiag row */
6301     for (p = mid; p < s;) {
6302       col = j[p];
6303       do {
6304         p++;
6305       } while (p < s && j[p] == col);
6306       Bnnz++;
6307     }
6308     k = s;
6309   }
6310 
6311   /* Allocation according to Atot, Btot, Annz, Bnnz */
6312   PetscCall(PetscMalloc1(Atot, &Aperm));
6313   PetscCall(PetscMalloc1(Btot, &Bperm));
6314   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6315   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6316 
6317   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6318   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6319   for (r = 0; r < m; r++) {
6320     k   = rowBegin[r];
6321     mid = rowMid[r];
6322     s   = rowEnd[r];
6323     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6324     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6325     Atot += mid - k;
6326     Btot += s - mid;
6327 
6328     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6329     for (p = k; p < mid;) {
6330       col = j[p];
6331       q   = p;
6332       do {
6333         p++;
6334       } while (p < mid && j[p] == col);
6335       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6336       Annz++;
6337     }
6338 
6339     for (p = mid; p < s;) {
6340       col = j[p];
6341       q   = p;
6342       do {
6343         p++;
6344       } while (p < s && j[p] == col);
6345       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6346       Bnnz++;
6347     }
6348   }
6349   /* Output */
6350   *Aperm_ = Aperm;
6351   *Annz_  = Annz;
6352   *Atot_  = Atot;
6353   *Ajmap_ = Ajmap;
6354   *Bperm_ = Bperm;
6355   *Bnnz_  = Bnnz;
6356   *Btot_  = Btot;
6357   *Bjmap_ = Bjmap;
6358   PetscFunctionReturn(PETSC_SUCCESS);
6359 }
6360 
6361 /*
6362   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6363 
6364   Input Parameters:
6365     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6366     nnz:  number of unique nonzeros in the merged matrix
6367     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6368     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6369 
6370   Output Parameter: (memory is allocated by the caller)
6371     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6372 
6373   Example:
6374     nnz1 = 4
6375     nnz  = 6
6376     imap = [1,3,4,5]
6377     jmap = [0,3,5,6,7]
6378    then,
6379     jmap_new = [0,0,3,3,5,6,7]
6380 */
6381 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6382 {
6383   PetscCount k, p;
6384 
6385   PetscFunctionBegin;
6386   jmap_new[0] = 0;
6387   p           = nnz;                /* p loops over jmap_new[] backwards */
6388   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6389     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6390   }
6391   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6392   PetscFunctionReturn(PETSC_SUCCESS);
6393 }
6394 
6395 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6396 {
6397   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6398 
6399   PetscFunctionBegin;
6400   PetscCall(PetscSFDestroy(&coo->sf));
6401   PetscCall(PetscFree(coo->Aperm1));
6402   PetscCall(PetscFree(coo->Bperm1));
6403   PetscCall(PetscFree(coo->Ajmap1));
6404   PetscCall(PetscFree(coo->Bjmap1));
6405   PetscCall(PetscFree(coo->Aimap2));
6406   PetscCall(PetscFree(coo->Bimap2));
6407   PetscCall(PetscFree(coo->Aperm2));
6408   PetscCall(PetscFree(coo->Bperm2));
6409   PetscCall(PetscFree(coo->Ajmap2));
6410   PetscCall(PetscFree(coo->Bjmap2));
6411   PetscCall(PetscFree(coo->Cperm1));
6412   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6413   PetscCall(PetscFree(coo));
6414   PetscFunctionReturn(PETSC_SUCCESS);
6415 }
6416 
6417 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6418 {
6419   MPI_Comm             comm;
6420   PetscMPIInt          rank, size;
6421   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6422   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6423   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6424   PetscContainer       container;
6425   MatCOOStruct_MPIAIJ *coo;
6426 
6427   PetscFunctionBegin;
6428   PetscCall(PetscFree(mpiaij->garray));
6429   PetscCall(VecDestroy(&mpiaij->lvec));
6430 #if defined(PETSC_USE_CTABLE)
6431   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6432 #else
6433   PetscCall(PetscFree(mpiaij->colmap));
6434 #endif
6435   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6436   mat->assembled     = PETSC_FALSE;
6437   mat->was_assembled = PETSC_FALSE;
6438 
6439   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6440   PetscCallMPI(MPI_Comm_size(comm, &size));
6441   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6442   PetscCall(PetscLayoutSetUp(mat->rmap));
6443   PetscCall(PetscLayoutSetUp(mat->cmap));
6444   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6445   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6446   PetscCall(MatGetLocalSize(mat, &m, &n));
6447   PetscCall(MatGetSize(mat, &M, &N));
6448 
6449   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6450   /* entries come first, then local rows, then remote rows.                     */
6451   PetscCount n1 = coo_n, *perm1;
6452   PetscInt  *i1 = coo_i, *j1 = coo_j;
6453 
6454   PetscCall(PetscMalloc1(n1, &perm1));
6455   for (k = 0; k < n1; k++) perm1[k] = k;
6456 
6457   /* Manipulate indices so that entries with negative row or col indices will have smallest
6458      row indices, local entries will have greater but negative row indices, and remote entries
6459      will have positive row indices.
6460   */
6461   for (k = 0; k < n1; k++) {
6462     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6463     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6464     else {
6465       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6466       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6467     }
6468   }
6469 
6470   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6471   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6472 
6473   /* Advance k to the first entry we need to take care of */
6474   for (k = 0; k < n1; k++)
6475     if (i1[k] > PETSC_INT_MIN) break;
6476   PetscCount i1start = k;
6477 
6478   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6479   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6480 
6481   /*           Send remote rows to their owner                                  */
6482   /* Find which rows should be sent to which remote ranks*/
6483   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6484   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6485   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6486   const PetscInt *ranges;
6487   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6488 
6489   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6490   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6491   for (k = rem; k < n1;) {
6492     PetscMPIInt owner;
6493     PetscInt    firstRow, lastRow;
6494 
6495     /* Locate a row range */
6496     firstRow = i1[k]; /* first row of this owner */
6497     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6498     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6499 
6500     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6501     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6502 
6503     /* All entries in [k,p) belong to this remote owner */
6504     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6505       PetscMPIInt *sendto2;
6506       PetscInt    *nentries2;
6507       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6508 
6509       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6510       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6511       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6512       PetscCall(PetscFree2(sendto, nentries2));
6513       sendto   = sendto2;
6514       nentries = nentries2;
6515       maxNsend = maxNsend2;
6516     }
6517     sendto[nsend] = owner;
6518     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6519     nsend++;
6520     k = p;
6521   }
6522 
6523   /* Build 1st SF to know offsets on remote to send data */
6524   PetscSF      sf1;
6525   PetscInt     nroots = 1, nroots2 = 0;
6526   PetscInt     nleaves = nsend, nleaves2 = 0;
6527   PetscInt    *offsets;
6528   PetscSFNode *iremote;
6529 
6530   PetscCall(PetscSFCreate(comm, &sf1));
6531   PetscCall(PetscMalloc1(nsend, &iremote));
6532   PetscCall(PetscMalloc1(nsend, &offsets));
6533   for (k = 0; k < nsend; k++) {
6534     iremote[k].rank  = sendto[k];
6535     iremote[k].index = 0;
6536     nleaves2 += nentries[k];
6537     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6538   }
6539   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6540   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6541   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6542   PetscCall(PetscSFDestroy(&sf1));
6543   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6544 
6545   /* Build 2nd SF to send remote COOs to their owner */
6546   PetscSF sf2;
6547   nroots  = nroots2;
6548   nleaves = nleaves2;
6549   PetscCall(PetscSFCreate(comm, &sf2));
6550   PetscCall(PetscSFSetFromOptions(sf2));
6551   PetscCall(PetscMalloc1(nleaves, &iremote));
6552   p = 0;
6553   for (k = 0; k < nsend; k++) {
6554     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6555     for (q = 0; q < nentries[k]; q++, p++) {
6556       iremote[p].rank = sendto[k];
6557       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6558     }
6559   }
6560   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6561 
6562   /* Send the remote COOs to their owner */
6563   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6564   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6565   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6566   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6567   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6568   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6569   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6570   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6571   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6572   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6573   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6574 
6575   PetscCall(PetscFree(offsets));
6576   PetscCall(PetscFree2(sendto, nentries));
6577 
6578   /* Sort received COOs by row along with the permutation array     */
6579   for (k = 0; k < n2; k++) perm2[k] = k;
6580   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6581 
6582   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6583   PetscCount *Cperm1;
6584   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6585   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6586   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6587   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6588 
6589   /* Support for HYPRE matrices, kind of a hack.
6590      Swap min column with diagonal so that diagonal values will go first */
6591   PetscBool hypre;
6592   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6593   if (hypre) {
6594     PetscInt *minj;
6595     PetscBT   hasdiag;
6596 
6597     PetscCall(PetscBTCreate(m, &hasdiag));
6598     PetscCall(PetscMalloc1(m, &minj));
6599     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6600     for (k = i1start; k < rem; k++) {
6601       if (j1[k] < cstart || j1[k] >= cend) continue;
6602       const PetscInt rindex = i1[k] - rstart;
6603       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6604       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6605     }
6606     for (k = 0; k < n2; k++) {
6607       if (j2[k] < cstart || j2[k] >= cend) continue;
6608       const PetscInt rindex = i2[k] - rstart;
6609       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6610       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6611     }
6612     for (k = i1start; k < rem; k++) {
6613       const PetscInt rindex = i1[k] - rstart;
6614       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6615       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6616       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6617     }
6618     for (k = 0; k < n2; k++) {
6619       const PetscInt rindex = i2[k] - rstart;
6620       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6621       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6622       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6623     }
6624     PetscCall(PetscBTDestroy(&hasdiag));
6625     PetscCall(PetscFree(minj));
6626   }
6627 
6628   /* Split local COOs and received COOs into diag/offdiag portions */
6629   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6630   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6631   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6632   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6633   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6634   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6635 
6636   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6637   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6638   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6639   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6640 
6641   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6642   PetscInt *Ai, *Bi;
6643   PetscInt *Aj, *Bj;
6644 
6645   PetscCall(PetscMalloc1(m + 1, &Ai));
6646   PetscCall(PetscMalloc1(m + 1, &Bi));
6647   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6648   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6649 
6650   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6651   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6652   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6653   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6654   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6655 
6656   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6657   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6658 
6659   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6660   /* expect nonzeros in A/B most likely have local contributing entries        */
6661   PetscInt    Annz = Ai[m];
6662   PetscInt    Bnnz = Bi[m];
6663   PetscCount *Ajmap1_new, *Bjmap1_new;
6664 
6665   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6666   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6667 
6668   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6669   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6670 
6671   PetscCall(PetscFree(Aimap1));
6672   PetscCall(PetscFree(Ajmap1));
6673   PetscCall(PetscFree(Bimap1));
6674   PetscCall(PetscFree(Bjmap1));
6675   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6676   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6677   PetscCall(PetscFree(perm1));
6678   PetscCall(PetscFree3(i2, j2, perm2));
6679 
6680   Ajmap1 = Ajmap1_new;
6681   Bjmap1 = Bjmap1_new;
6682 
6683   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6684   if (Annz < Annz1 + Annz2) {
6685     PetscInt *Aj_new;
6686     PetscCall(PetscMalloc1(Annz, &Aj_new));
6687     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6688     PetscCall(PetscFree(Aj));
6689     Aj = Aj_new;
6690   }
6691 
6692   if (Bnnz < Bnnz1 + Bnnz2) {
6693     PetscInt *Bj_new;
6694     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6695     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6696     PetscCall(PetscFree(Bj));
6697     Bj = Bj_new;
6698   }
6699 
6700   /* Create new submatrices for on-process and off-process coupling                  */
6701   PetscScalar     *Aa, *Ba;
6702   MatType          rtype;
6703   Mat_SeqAIJ      *a, *b;
6704   PetscObjectState state;
6705   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6706   PetscCall(PetscCalloc1(Bnnz, &Ba));
6707   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6708   if (cstart) {
6709     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6710   }
6711 
6712   PetscCall(MatGetRootType_Private(mat, &rtype));
6713 
6714   MatSeqXAIJGetOptions_Private(mpiaij->A);
6715   PetscCall(MatDestroy(&mpiaij->A));
6716   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6717   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6718   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6719 
6720   MatSeqXAIJGetOptions_Private(mpiaij->B);
6721   PetscCall(MatDestroy(&mpiaij->B));
6722   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6723   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6724   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6725 
6726   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6727   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6728   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6729   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6730 
6731   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6732   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6733   a->free_a  = PETSC_TRUE;
6734   a->free_ij = PETSC_TRUE;
6735   b->free_a  = PETSC_TRUE;
6736   b->free_ij = PETSC_TRUE;
6737   a->maxnz   = a->nz;
6738   b->maxnz   = b->nz;
6739 
6740   /* conversion must happen AFTER multiply setup */
6741   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6742   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6743   PetscCall(VecDestroy(&mpiaij->lvec));
6744   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6745 
6746   // Put the COO struct in a container and then attach that to the matrix
6747   PetscCall(PetscMalloc1(1, &coo));
6748   coo->n       = coo_n;
6749   coo->sf      = sf2;
6750   coo->sendlen = nleaves;
6751   coo->recvlen = nroots;
6752   coo->Annz    = Annz;
6753   coo->Bnnz    = Bnnz;
6754   coo->Annz2   = Annz2;
6755   coo->Bnnz2   = Bnnz2;
6756   coo->Atot1   = Atot1;
6757   coo->Atot2   = Atot2;
6758   coo->Btot1   = Btot1;
6759   coo->Btot2   = Btot2;
6760   coo->Ajmap1  = Ajmap1;
6761   coo->Aperm1  = Aperm1;
6762   coo->Bjmap1  = Bjmap1;
6763   coo->Bperm1  = Bperm1;
6764   coo->Aimap2  = Aimap2;
6765   coo->Ajmap2  = Ajmap2;
6766   coo->Aperm2  = Aperm2;
6767   coo->Bimap2  = Bimap2;
6768   coo->Bjmap2  = Bjmap2;
6769   coo->Bperm2  = Bperm2;
6770   coo->Cperm1  = Cperm1;
6771   // Allocate in preallocation. If not used, it has zero cost on host
6772   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6773   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6774   PetscCall(PetscContainerSetPointer(container, coo));
6775   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6776   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6777   PetscCall(PetscContainerDestroy(&container));
6778   PetscFunctionReturn(PETSC_SUCCESS);
6779 }
6780 
6781 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6782 {
6783   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6784   Mat                  A = mpiaij->A, B = mpiaij->B;
6785   PetscScalar         *Aa, *Ba;
6786   PetscScalar         *sendbuf, *recvbuf;
6787   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6788   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6789   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6790   const PetscCount    *Cperm1;
6791   PetscContainer       container;
6792   MatCOOStruct_MPIAIJ *coo;
6793 
6794   PetscFunctionBegin;
6795   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6796   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6797   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6798   sendbuf = coo->sendbuf;
6799   recvbuf = coo->recvbuf;
6800   Ajmap1  = coo->Ajmap1;
6801   Ajmap2  = coo->Ajmap2;
6802   Aimap2  = coo->Aimap2;
6803   Bjmap1  = coo->Bjmap1;
6804   Bjmap2  = coo->Bjmap2;
6805   Bimap2  = coo->Bimap2;
6806   Aperm1  = coo->Aperm1;
6807   Aperm2  = coo->Aperm2;
6808   Bperm1  = coo->Bperm1;
6809   Bperm2  = coo->Bperm2;
6810   Cperm1  = coo->Cperm1;
6811 
6812   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6813   PetscCall(MatSeqAIJGetArray(B, &Ba));
6814 
6815   /* Pack entries to be sent to remote */
6816   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6817 
6818   /* Send remote entries to their owner and overlap the communication with local computation */
6819   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6820   /* Add local entries to A and B */
6821   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6822     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6823     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6824     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6825   }
6826   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6827     PetscScalar sum = 0.0;
6828     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6829     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6830   }
6831   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6832 
6833   /* Add received remote entries to A and B */
6834   for (PetscCount i = 0; i < coo->Annz2; i++) {
6835     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6836   }
6837   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6838     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6839   }
6840   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6841   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6842   PetscFunctionReturn(PETSC_SUCCESS);
6843 }
6844 
6845 /*MC
6846    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6847 
6848    Options Database Keys:
6849 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6850 
6851    Level: beginner
6852 
6853    Notes:
6854    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6855     in this case the values associated with the rows and columns one passes in are set to zero
6856     in the matrix
6857 
6858     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6859     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6860 
6861 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6862 M*/
6863 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6864 {
6865   Mat_MPIAIJ *b;
6866   PetscMPIInt size;
6867 
6868   PetscFunctionBegin;
6869   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6870 
6871   PetscCall(PetscNew(&b));
6872   B->data       = (void *)b;
6873   B->ops[0]     = MatOps_Values;
6874   B->assembled  = PETSC_FALSE;
6875   B->insertmode = NOT_SET_VALUES;
6876   b->size       = size;
6877 
6878   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6879 
6880   /* build cache for off array entries formed */
6881   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6882 
6883   b->donotstash  = PETSC_FALSE;
6884   b->colmap      = NULL;
6885   b->garray      = NULL;
6886   b->roworiented = PETSC_TRUE;
6887 
6888   /* stuff used for matrix vector multiply */
6889   b->lvec  = NULL;
6890   b->Mvctx = NULL;
6891 
6892   /* stuff for MatGetRow() */
6893   b->rowindices   = NULL;
6894   b->rowvalues    = NULL;
6895   b->getrowactive = PETSC_FALSE;
6896 
6897   /* flexible pointer used in CUSPARSE classes */
6898   b->spptr = NULL;
6899 
6900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6910 #if defined(PETSC_HAVE_CUDA)
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6912 #endif
6913 #if defined(PETSC_HAVE_HIP)
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6915 #endif
6916 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6918 #endif
6919 #if defined(PETSC_HAVE_MKL_SPARSE)
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6921 #endif
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6926 #if defined(PETSC_HAVE_ELEMENTAL)
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6928 #endif
6929 #if defined(PETSC_HAVE_SCALAPACK)
6930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6931 #endif
6932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6934 #if defined(PETSC_HAVE_HYPRE)
6935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6937 #endif
6938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6940   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6941   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6942   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6943   PetscFunctionReturn(PETSC_SUCCESS);
6944 }
6945 
6946 /*@
6947   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6948   and "off-diagonal" part of the matrix in CSR format.
6949 
6950   Collective
6951 
6952   Input Parameters:
6953 + comm - MPI communicator
6954 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6955 . n    - This value should be the same as the local size used in creating the
6956          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6957          calculated if `N` is given) For square matrices `n` is almost always `m`.
6958 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6959 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6960 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6961 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6962 . a    - matrix values
6963 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6964 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6965 - oa   - matrix values
6966 
6967   Output Parameter:
6968 . mat - the matrix
6969 
6970   Level: advanced
6971 
6972   Notes:
6973   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6974   must free the arrays once the matrix has been destroyed and not before.
6975 
6976   The `i` and `j` indices are 0 based
6977 
6978   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6979 
6980   This sets local rows and cannot be used to set off-processor values.
6981 
6982   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6983   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6984   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6985   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6986   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6987   communication if it is known that only local entries will be set.
6988 
6989 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6990           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6991 @*/
6992 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6993 {
6994   Mat_MPIAIJ *maij;
6995 
6996   PetscFunctionBegin;
6997   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6998   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6999   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
7000   PetscCall(MatCreate(comm, mat));
7001   PetscCall(MatSetSizes(*mat, m, n, M, N));
7002   PetscCall(MatSetType(*mat, MATMPIAIJ));
7003   maij = (Mat_MPIAIJ *)(*mat)->data;
7004 
7005   (*mat)->preallocated = PETSC_TRUE;
7006 
7007   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7008   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7009 
7010   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7011   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7012 
7013   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7014   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7015   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7016   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7017   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7018   PetscFunctionReturn(PETSC_SUCCESS);
7019 }
7020 
7021 typedef struct {
7022   Mat       *mp;    /* intermediate products */
7023   PetscBool *mptmp; /* is the intermediate product temporary ? */
7024   PetscInt   cp;    /* number of intermediate products */
7025 
7026   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7027   PetscInt    *startsj_s, *startsj_r;
7028   PetscScalar *bufa;
7029   Mat          P_oth;
7030 
7031   /* may take advantage of merging product->B */
7032   Mat Bloc; /* B-local by merging diag and off-diag */
7033 
7034   /* cusparse does not have support to split between symbolic and numeric phases.
7035      When api_user is true, we don't need to update the numerical values
7036      of the temporary storage */
7037   PetscBool reusesym;
7038 
7039   /* support for COO values insertion */
7040   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7041   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7042   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7043   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7044   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7045   PetscMemType mtype;
7046 
7047   /* customization */
7048   PetscBool abmerge;
7049   PetscBool P_oth_bind;
7050 } MatMatMPIAIJBACKEND;
7051 
7052 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7053 {
7054   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7055   PetscInt             i;
7056 
7057   PetscFunctionBegin;
7058   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7059   PetscCall(PetscFree(mmdata->bufa));
7060   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7061   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7062   PetscCall(MatDestroy(&mmdata->P_oth));
7063   PetscCall(MatDestroy(&mmdata->Bloc));
7064   PetscCall(PetscSFDestroy(&mmdata->sf));
7065   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7066   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7067   PetscCall(PetscFree(mmdata->own[0]));
7068   PetscCall(PetscFree(mmdata->own));
7069   PetscCall(PetscFree(mmdata->off[0]));
7070   PetscCall(PetscFree(mmdata->off));
7071   PetscCall(PetscFree(mmdata));
7072   PetscFunctionReturn(PETSC_SUCCESS);
7073 }
7074 
7075 /* Copy selected n entries with indices in idx[] of A to v[].
7076    If idx is NULL, copy the whole data array of A to v[]
7077  */
7078 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7079 {
7080   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7081 
7082   PetscFunctionBegin;
7083   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7084   if (f) {
7085     PetscCall((*f)(A, n, idx, v));
7086   } else {
7087     const PetscScalar *vv;
7088 
7089     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7090     if (n && idx) {
7091       PetscScalar    *w  = v;
7092       const PetscInt *oi = idx;
7093       PetscInt        j;
7094 
7095       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7096     } else {
7097       PetscCall(PetscArraycpy(v, vv, n));
7098     }
7099     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7100   }
7101   PetscFunctionReturn(PETSC_SUCCESS);
7102 }
7103 
7104 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7105 {
7106   MatMatMPIAIJBACKEND *mmdata;
7107   PetscInt             i, n_d, n_o;
7108 
7109   PetscFunctionBegin;
7110   MatCheckProduct(C, 1);
7111   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7112   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7113   if (!mmdata->reusesym) { /* update temporary matrices */
7114     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7115     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7116   }
7117   mmdata->reusesym = PETSC_FALSE;
7118 
7119   for (i = 0; i < mmdata->cp; i++) {
7120     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7121     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7122   }
7123   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7124     PetscInt noff;
7125 
7126     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7127     if (mmdata->mptmp[i]) continue;
7128     if (noff) {
7129       PetscInt nown;
7130 
7131       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7132       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7133       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7134       n_o += noff;
7135       n_d += nown;
7136     } else {
7137       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7138 
7139       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7140       n_d += mm->nz;
7141     }
7142   }
7143   if (mmdata->hasoffproc) { /* offprocess insertion */
7144     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7145     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7146   }
7147   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7148   PetscFunctionReturn(PETSC_SUCCESS);
7149 }
7150 
7151 /* Support for Pt * A, A * P, or Pt * A * P */
7152 #define MAX_NUMBER_INTERMEDIATE 4
7153 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7154 {
7155   Mat_Product           *product = C->product;
7156   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7157   Mat_MPIAIJ            *a, *p;
7158   MatMatMPIAIJBACKEND   *mmdata;
7159   ISLocalToGlobalMapping P_oth_l2g = NULL;
7160   IS                     glob      = NULL;
7161   const char            *prefix;
7162   char                   pprefix[256];
7163   const PetscInt        *globidx, *P_oth_idx;
7164   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7165   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7166   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7167                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7168                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7169   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7170 
7171   MatProductType ptype;
7172   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7173   PetscMPIInt    size;
7174 
7175   PetscFunctionBegin;
7176   MatCheckProduct(C, 1);
7177   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7178   ptype = product->type;
7179   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7180     ptype                                          = MATPRODUCT_AB;
7181     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7182   }
7183   switch (ptype) {
7184   case MATPRODUCT_AB:
7185     A          = product->A;
7186     P          = product->B;
7187     m          = A->rmap->n;
7188     n          = P->cmap->n;
7189     M          = A->rmap->N;
7190     N          = P->cmap->N;
7191     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7192     break;
7193   case MATPRODUCT_AtB:
7194     P          = product->A;
7195     A          = product->B;
7196     m          = P->cmap->n;
7197     n          = A->cmap->n;
7198     M          = P->cmap->N;
7199     N          = A->cmap->N;
7200     hasoffproc = PETSC_TRUE;
7201     break;
7202   case MATPRODUCT_PtAP:
7203     A          = product->A;
7204     P          = product->B;
7205     m          = P->cmap->n;
7206     n          = P->cmap->n;
7207     M          = P->cmap->N;
7208     N          = P->cmap->N;
7209     hasoffproc = PETSC_TRUE;
7210     break;
7211   default:
7212     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7213   }
7214   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7215   if (size == 1) hasoffproc = PETSC_FALSE;
7216 
7217   /* defaults */
7218   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7219     mp[i]    = NULL;
7220     mptmp[i] = PETSC_FALSE;
7221     rmapt[i] = -1;
7222     cmapt[i] = -1;
7223     rmapa[i] = NULL;
7224     cmapa[i] = NULL;
7225   }
7226 
7227   /* customization */
7228   PetscCall(PetscNew(&mmdata));
7229   mmdata->reusesym = product->api_user;
7230   if (ptype == MATPRODUCT_AB) {
7231     if (product->api_user) {
7232       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7233       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7234       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7235       PetscOptionsEnd();
7236     } else {
7237       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7238       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7239       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7240       PetscOptionsEnd();
7241     }
7242   } else if (ptype == MATPRODUCT_PtAP) {
7243     if (product->api_user) {
7244       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7245       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7246       PetscOptionsEnd();
7247     } else {
7248       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7249       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7250       PetscOptionsEnd();
7251     }
7252   }
7253   a = (Mat_MPIAIJ *)A->data;
7254   p = (Mat_MPIAIJ *)P->data;
7255   PetscCall(MatSetSizes(C, m, n, M, N));
7256   PetscCall(PetscLayoutSetUp(C->rmap));
7257   PetscCall(PetscLayoutSetUp(C->cmap));
7258   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7259   PetscCall(MatGetOptionsPrefix(C, &prefix));
7260 
7261   cp = 0;
7262   switch (ptype) {
7263   case MATPRODUCT_AB: /* A * P */
7264     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7265 
7266     /* A_diag * P_local (merged or not) */
7267     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7268       /* P is product->B */
7269       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7270       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7271       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7272       PetscCall(MatProductSetFill(mp[cp], product->fill));
7273       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7274       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7275       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7276       mp[cp]->product->api_user = product->api_user;
7277       PetscCall(MatProductSetFromOptions(mp[cp]));
7278       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7279       PetscCall(ISGetIndices(glob, &globidx));
7280       rmapt[cp] = 1;
7281       cmapt[cp] = 2;
7282       cmapa[cp] = globidx;
7283       mptmp[cp] = PETSC_FALSE;
7284       cp++;
7285     } else { /* A_diag * P_diag and A_diag * P_off */
7286       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7287       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7288       PetscCall(MatProductSetFill(mp[cp], product->fill));
7289       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7290       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7291       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7292       mp[cp]->product->api_user = product->api_user;
7293       PetscCall(MatProductSetFromOptions(mp[cp]));
7294       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7295       rmapt[cp] = 1;
7296       cmapt[cp] = 1;
7297       mptmp[cp] = PETSC_FALSE;
7298       cp++;
7299       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7300       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7301       PetscCall(MatProductSetFill(mp[cp], product->fill));
7302       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7303       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7304       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7305       mp[cp]->product->api_user = product->api_user;
7306       PetscCall(MatProductSetFromOptions(mp[cp]));
7307       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7308       rmapt[cp] = 1;
7309       cmapt[cp] = 2;
7310       cmapa[cp] = p->garray;
7311       mptmp[cp] = PETSC_FALSE;
7312       cp++;
7313     }
7314 
7315     /* A_off * P_other */
7316     if (mmdata->P_oth) {
7317       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7318       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7319       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7320       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7321       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7322       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7323       PetscCall(MatProductSetFill(mp[cp], product->fill));
7324       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7325       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7326       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7327       mp[cp]->product->api_user = product->api_user;
7328       PetscCall(MatProductSetFromOptions(mp[cp]));
7329       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7330       rmapt[cp] = 1;
7331       cmapt[cp] = 2;
7332       cmapa[cp] = P_oth_idx;
7333       mptmp[cp] = PETSC_FALSE;
7334       cp++;
7335     }
7336     break;
7337 
7338   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7339     /* A is product->B */
7340     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7341     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7342       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7343       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7344       PetscCall(MatProductSetFill(mp[cp], product->fill));
7345       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7346       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7347       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7348       mp[cp]->product->api_user = product->api_user;
7349       PetscCall(MatProductSetFromOptions(mp[cp]));
7350       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7351       PetscCall(ISGetIndices(glob, &globidx));
7352       rmapt[cp] = 2;
7353       rmapa[cp] = globidx;
7354       cmapt[cp] = 2;
7355       cmapa[cp] = globidx;
7356       mptmp[cp] = PETSC_FALSE;
7357       cp++;
7358     } else {
7359       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7360       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7361       PetscCall(MatProductSetFill(mp[cp], product->fill));
7362       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7363       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7364       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7365       mp[cp]->product->api_user = product->api_user;
7366       PetscCall(MatProductSetFromOptions(mp[cp]));
7367       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7368       PetscCall(ISGetIndices(glob, &globidx));
7369       rmapt[cp] = 1;
7370       cmapt[cp] = 2;
7371       cmapa[cp] = globidx;
7372       mptmp[cp] = PETSC_FALSE;
7373       cp++;
7374       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7375       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7376       PetscCall(MatProductSetFill(mp[cp], product->fill));
7377       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7378       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7379       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7380       mp[cp]->product->api_user = product->api_user;
7381       PetscCall(MatProductSetFromOptions(mp[cp]));
7382       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7383       rmapt[cp] = 2;
7384       rmapa[cp] = p->garray;
7385       cmapt[cp] = 2;
7386       cmapa[cp] = globidx;
7387       mptmp[cp] = PETSC_FALSE;
7388       cp++;
7389     }
7390     break;
7391   case MATPRODUCT_PtAP:
7392     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7393     /* P is product->B */
7394     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7395     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7396     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7397     PetscCall(MatProductSetFill(mp[cp], product->fill));
7398     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7399     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7400     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7401     mp[cp]->product->api_user = product->api_user;
7402     PetscCall(MatProductSetFromOptions(mp[cp]));
7403     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7404     PetscCall(ISGetIndices(glob, &globidx));
7405     rmapt[cp] = 2;
7406     rmapa[cp] = globidx;
7407     cmapt[cp] = 2;
7408     cmapa[cp] = globidx;
7409     mptmp[cp] = PETSC_FALSE;
7410     cp++;
7411     if (mmdata->P_oth) {
7412       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7413       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7414       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7415       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7416       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7417       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7418       PetscCall(MatProductSetFill(mp[cp], product->fill));
7419       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7420       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7421       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7422       mp[cp]->product->api_user = product->api_user;
7423       PetscCall(MatProductSetFromOptions(mp[cp]));
7424       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7425       mptmp[cp] = PETSC_TRUE;
7426       cp++;
7427       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7428       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7429       PetscCall(MatProductSetFill(mp[cp], product->fill));
7430       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7431       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7432       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7433       mp[cp]->product->api_user = product->api_user;
7434       PetscCall(MatProductSetFromOptions(mp[cp]));
7435       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7436       rmapt[cp] = 2;
7437       rmapa[cp] = globidx;
7438       cmapt[cp] = 2;
7439       cmapa[cp] = P_oth_idx;
7440       mptmp[cp] = PETSC_FALSE;
7441       cp++;
7442     }
7443     break;
7444   default:
7445     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7446   }
7447   /* sanity check */
7448   if (size > 1)
7449     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7450 
7451   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7452   for (i = 0; i < cp; i++) {
7453     mmdata->mp[i]    = mp[i];
7454     mmdata->mptmp[i] = mptmp[i];
7455   }
7456   mmdata->cp             = cp;
7457   C->product->data       = mmdata;
7458   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7459   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7460 
7461   /* memory type */
7462   mmdata->mtype = PETSC_MEMTYPE_HOST;
7463   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7464   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7465   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7466   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7467   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7468   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7469 
7470   /* prepare coo coordinates for values insertion */
7471 
7472   /* count total nonzeros of those intermediate seqaij Mats
7473     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7474     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7475     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7476   */
7477   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7478     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7479     if (mptmp[cp]) continue;
7480     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7481       const PetscInt *rmap = rmapa[cp];
7482       const PetscInt  mr   = mp[cp]->rmap->n;
7483       const PetscInt  rs   = C->rmap->rstart;
7484       const PetscInt  re   = C->rmap->rend;
7485       const PetscInt *ii   = mm->i;
7486       for (i = 0; i < mr; i++) {
7487         const PetscInt gr = rmap[i];
7488         const PetscInt nz = ii[i + 1] - ii[i];
7489         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7490         else ncoo_oown += nz;                  /* this row is local */
7491       }
7492     } else ncoo_d += mm->nz;
7493   }
7494 
7495   /*
7496     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7497 
7498     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7499 
7500     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7501 
7502     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7503     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7504     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7505 
7506     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7507     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7508   */
7509   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7510   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7511 
7512   /* gather (i,j) of nonzeros inserted by remote procs */
7513   if (hasoffproc) {
7514     PetscSF  msf;
7515     PetscInt ncoo2, *coo_i2, *coo_j2;
7516 
7517     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7518     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7519     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7520 
7521     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7522       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7523       PetscInt   *idxoff = mmdata->off[cp];
7524       PetscInt   *idxown = mmdata->own[cp];
7525       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7526         const PetscInt *rmap = rmapa[cp];
7527         const PetscInt *cmap = cmapa[cp];
7528         const PetscInt *ii   = mm->i;
7529         PetscInt       *coi  = coo_i + ncoo_o;
7530         PetscInt       *coj  = coo_j + ncoo_o;
7531         const PetscInt  mr   = mp[cp]->rmap->n;
7532         const PetscInt  rs   = C->rmap->rstart;
7533         const PetscInt  re   = C->rmap->rend;
7534         const PetscInt  cs   = C->cmap->rstart;
7535         for (i = 0; i < mr; i++) {
7536           const PetscInt *jj = mm->j + ii[i];
7537           const PetscInt  gr = rmap[i];
7538           const PetscInt  nz = ii[i + 1] - ii[i];
7539           if (gr < rs || gr >= re) { /* this is an offproc row */
7540             for (j = ii[i]; j < ii[i + 1]; j++) {
7541               *coi++    = gr;
7542               *idxoff++ = j;
7543             }
7544             if (!cmapt[cp]) { /* already global */
7545               for (j = 0; j < nz; j++) *coj++ = jj[j];
7546             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7547               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7548             } else { /* offdiag */
7549               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7550             }
7551             ncoo_o += nz;
7552           } else { /* this is a local row */
7553             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7554           }
7555         }
7556       }
7557       mmdata->off[cp + 1] = idxoff;
7558       mmdata->own[cp + 1] = idxown;
7559     }
7560 
7561     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7562     PetscInt incoo_o;
7563     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7564     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7565     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7566     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7567     ncoo = ncoo_d + ncoo_oown + ncoo2;
7568     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7569     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7570     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7571     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7572     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7573     PetscCall(PetscFree2(coo_i, coo_j));
7574     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7575     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7576     coo_i = coo_i2;
7577     coo_j = coo_j2;
7578   } else { /* no offproc values insertion */
7579     ncoo = ncoo_d;
7580     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7581 
7582     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7583     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7584     PetscCall(PetscSFSetUp(mmdata->sf));
7585   }
7586   mmdata->hasoffproc = hasoffproc;
7587 
7588   /* gather (i,j) of nonzeros inserted locally */
7589   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7590     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7591     PetscInt       *coi  = coo_i + ncoo_d;
7592     PetscInt       *coj  = coo_j + ncoo_d;
7593     const PetscInt *jj   = mm->j;
7594     const PetscInt *ii   = mm->i;
7595     const PetscInt *cmap = cmapa[cp];
7596     const PetscInt *rmap = rmapa[cp];
7597     const PetscInt  mr   = mp[cp]->rmap->n;
7598     const PetscInt  rs   = C->rmap->rstart;
7599     const PetscInt  re   = C->rmap->rend;
7600     const PetscInt  cs   = C->cmap->rstart;
7601 
7602     if (mptmp[cp]) continue;
7603     if (rmapt[cp] == 1) { /* consecutive rows */
7604       /* fill coo_i */
7605       for (i = 0; i < mr; i++) {
7606         const PetscInt gr = i + rs;
7607         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7608       }
7609       /* fill coo_j */
7610       if (!cmapt[cp]) { /* type-0, already global */
7611         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7612       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7613         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7614       } else {                                            /* type-2, local to global for sparse columns */
7615         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7616       }
7617       ncoo_d += mm->nz;
7618     } else if (rmapt[cp] == 2) { /* sparse rows */
7619       for (i = 0; i < mr; i++) {
7620         const PetscInt *jj = mm->j + ii[i];
7621         const PetscInt  gr = rmap[i];
7622         const PetscInt  nz = ii[i + 1] - ii[i];
7623         if (gr >= rs && gr < re) { /* local rows */
7624           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7625           if (!cmapt[cp]) { /* type-0, already global */
7626             for (j = 0; j < nz; j++) *coj++ = jj[j];
7627           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7628             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7629           } else { /* type-2, local to global for sparse columns */
7630             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7631           }
7632           ncoo_d += nz;
7633         }
7634       }
7635     }
7636   }
7637   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7638   PetscCall(ISDestroy(&glob));
7639   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7640   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7641   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7642   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7643 
7644   /* preallocate with COO data */
7645   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7646   PetscCall(PetscFree2(coo_i, coo_j));
7647   PetscFunctionReturn(PETSC_SUCCESS);
7648 }
7649 
7650 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7651 {
7652   Mat_Product *product = mat->product;
7653 #if defined(PETSC_HAVE_DEVICE)
7654   PetscBool match  = PETSC_FALSE;
7655   PetscBool usecpu = PETSC_FALSE;
7656 #else
7657   PetscBool match = PETSC_TRUE;
7658 #endif
7659 
7660   PetscFunctionBegin;
7661   MatCheckProduct(mat, 1);
7662 #if defined(PETSC_HAVE_DEVICE)
7663   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7664   if (match) { /* we can always fallback to the CPU if requested */
7665     switch (product->type) {
7666     case MATPRODUCT_AB:
7667       if (product->api_user) {
7668         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7669         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7670         PetscOptionsEnd();
7671       } else {
7672         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7673         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7674         PetscOptionsEnd();
7675       }
7676       break;
7677     case MATPRODUCT_AtB:
7678       if (product->api_user) {
7679         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7680         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7681         PetscOptionsEnd();
7682       } else {
7683         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7684         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7685         PetscOptionsEnd();
7686       }
7687       break;
7688     case MATPRODUCT_PtAP:
7689       if (product->api_user) {
7690         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7691         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7692         PetscOptionsEnd();
7693       } else {
7694         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7695         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7696         PetscOptionsEnd();
7697       }
7698       break;
7699     default:
7700       break;
7701     }
7702     match = (PetscBool)!usecpu;
7703   }
7704 #endif
7705   if (match) {
7706     switch (product->type) {
7707     case MATPRODUCT_AB:
7708     case MATPRODUCT_AtB:
7709     case MATPRODUCT_PtAP:
7710       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7711       break;
7712     default:
7713       break;
7714     }
7715   }
7716   /* fallback to MPIAIJ ops */
7717   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7718   PetscFunctionReturn(PETSC_SUCCESS);
7719 }
7720 
7721 /*
7722    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7723 
7724    n - the number of block indices in cc[]
7725    cc - the block indices (must be large enough to contain the indices)
7726 */
7727 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7728 {
7729   PetscInt        cnt = -1, nidx, j;
7730   const PetscInt *idx;
7731 
7732   PetscFunctionBegin;
7733   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7734   if (nidx) {
7735     cnt     = 0;
7736     cc[cnt] = idx[0] / bs;
7737     for (j = 1; j < nidx; j++) {
7738       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7739     }
7740   }
7741   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7742   *n = cnt + 1;
7743   PetscFunctionReturn(PETSC_SUCCESS);
7744 }
7745 
7746 /*
7747     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7748 
7749     ncollapsed - the number of block indices
7750     collapsed - the block indices (must be large enough to contain the indices)
7751 */
7752 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7753 {
7754   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7755 
7756   PetscFunctionBegin;
7757   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7758   for (i = start + 1; i < start + bs; i++) {
7759     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7760     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7761     cprevtmp = cprev;
7762     cprev    = merged;
7763     merged   = cprevtmp;
7764   }
7765   *ncollapsed = nprev;
7766   if (collapsed) *collapsed = cprev;
7767   PetscFunctionReturn(PETSC_SUCCESS);
7768 }
7769 
7770 /*
7771  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7772 
7773  Input Parameter:
7774  . Amat - matrix
7775  - symmetrize - make the result symmetric
7776  + scale - scale with diagonal
7777 
7778  Output Parameter:
7779  . a_Gmat - output scalar graph >= 0
7780 
7781 */
7782 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7783 {
7784   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7785   MPI_Comm  comm;
7786   Mat       Gmat;
7787   PetscBool ismpiaij, isseqaij;
7788   Mat       a, b, c;
7789   MatType   jtype;
7790 
7791   PetscFunctionBegin;
7792   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7793   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7794   PetscCall(MatGetSize(Amat, &MM, &NN));
7795   PetscCall(MatGetBlockSize(Amat, &bs));
7796   nloc = (Iend - Istart) / bs;
7797 
7798   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7799   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7800   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7801 
7802   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7803   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7804      implementation */
7805   if (bs > 1) {
7806     PetscCall(MatGetType(Amat, &jtype));
7807     PetscCall(MatCreate(comm, &Gmat));
7808     PetscCall(MatSetType(Gmat, jtype));
7809     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7810     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7811     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7812       PetscInt  *d_nnz, *o_nnz;
7813       MatScalar *aa, val, *AA;
7814       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7815 
7816       if (isseqaij) {
7817         a = Amat;
7818         b = NULL;
7819       } else {
7820         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7821         a             = d->A;
7822         b             = d->B;
7823       }
7824       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7825       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7826       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7827         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7828         const PetscInt *cols1, *cols2;
7829 
7830         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7831           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7832           nnz[brow / bs] = nc2 / bs;
7833           if (nc2 % bs) ok = 0;
7834           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7835           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7836             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7837             if (nc1 != nc2) ok = 0;
7838             else {
7839               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7840                 if (cols1[jj] != cols2[jj]) ok = 0;
7841                 if (cols1[jj] % bs != jj % bs) ok = 0;
7842               }
7843             }
7844             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7845           }
7846           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7847           if (!ok) {
7848             PetscCall(PetscFree2(d_nnz, o_nnz));
7849             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7850             goto old_bs;
7851           }
7852         }
7853       }
7854       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7855       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7856       PetscCall(PetscFree2(d_nnz, o_nnz));
7857       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7858       // diag
7859       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7860         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7861 
7862         ai = aseq->i;
7863         n  = ai[brow + 1] - ai[brow];
7864         aj = aseq->j + ai[brow];
7865         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7866           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7867           val        = 0;
7868           if (index_size == 0) {
7869             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7870               aa = aseq->a + ai[brow + ii] + k;
7871               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7872                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7873               }
7874             }
7875           } else {                                            // use (index,index) value if provided
7876             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7877               PetscInt ii = index[iii];
7878               aa          = aseq->a + ai[brow + ii] + k;
7879               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7880                 PetscInt jj = index[jjj];
7881                 val += PetscAbs(PetscRealPart(aa[jj]));
7882               }
7883             }
7884           }
7885           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7886           AA[k / bs] = val;
7887         }
7888         grow = Istart / bs + brow / bs;
7889         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7890       }
7891       // off-diag
7892       if (ismpiaij) {
7893         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7894         const PetscScalar *vals;
7895         const PetscInt    *cols, *garray = aij->garray;
7896 
7897         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7898         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7899           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7900           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7901             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7902             AA[k / bs] = 0;
7903             AJ[cidx]   = garray[cols[k]] / bs;
7904           }
7905           nc = ncols / bs;
7906           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7907           if (index_size == 0) {
7908             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7909               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7910               for (PetscInt k = 0; k < ncols; k += bs) {
7911                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7912                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7913                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7914                 }
7915               }
7916               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7917             }
7918           } else {                                            // use (index,index) value if provided
7919             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7920               PetscInt ii = index[iii];
7921               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7922               for (PetscInt k = 0; k < ncols; k += bs) {
7923                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7924                   PetscInt jj = index[jjj];
7925                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7926                 }
7927               }
7928               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7929             }
7930           }
7931           grow = Istart / bs + brow / bs;
7932           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7933         }
7934       }
7935       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7936       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7937       PetscCall(PetscFree2(AA, AJ));
7938     } else {
7939       const PetscScalar *vals;
7940       const PetscInt    *idx;
7941       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7942     old_bs:
7943       /*
7944        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7945        */
7946       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7947       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7948       if (isseqaij) {
7949         PetscInt max_d_nnz;
7950 
7951         /*
7952          Determine exact preallocation count for (sequential) scalar matrix
7953          */
7954         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7955         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7956         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7957         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7958         PetscCall(PetscFree3(w0, w1, w2));
7959       } else if (ismpiaij) {
7960         Mat             Daij, Oaij;
7961         const PetscInt *garray;
7962         PetscInt        max_d_nnz;
7963 
7964         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7965         /*
7966          Determine exact preallocation count for diagonal block portion of scalar matrix
7967          */
7968         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7969         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7970         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7971         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7972         PetscCall(PetscFree3(w0, w1, w2));
7973         /*
7974          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7975          */
7976         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7977           o_nnz[jj] = 0;
7978           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7979             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7980             o_nnz[jj] += ncols;
7981             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7982           }
7983           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7984         }
7985       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7986       /* get scalar copy (norms) of matrix */
7987       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7988       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7989       PetscCall(PetscFree2(d_nnz, o_nnz));
7990       for (Ii = Istart; Ii < Iend; Ii++) {
7991         PetscInt dest_row = Ii / bs;
7992 
7993         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7994         for (jj = 0; jj < ncols; jj++) {
7995           PetscInt    dest_col = idx[jj] / bs;
7996           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7997 
7998           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7999         }
8000         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
8001       }
8002       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8003       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8004     }
8005   } else {
8006     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8007     else {
8008       Gmat = Amat;
8009       PetscCall(PetscObjectReference((PetscObject)Gmat));
8010     }
8011     if (isseqaij) {
8012       a = Gmat;
8013       b = NULL;
8014     } else {
8015       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8016       a             = d->A;
8017       b             = d->B;
8018     }
8019     if (filter >= 0 || scale) {
8020       /* take absolute value of each entry */
8021       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8022         MatInfo      info;
8023         PetscScalar *avals;
8024 
8025         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8026         PetscCall(MatSeqAIJGetArray(c, &avals));
8027         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8028         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8029       }
8030     }
8031   }
8032   if (symmetrize) {
8033     PetscBool isset, issym;
8034 
8035     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8036     if (!isset || !issym) {
8037       Mat matTrans;
8038 
8039       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8040       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8041       PetscCall(MatDestroy(&matTrans));
8042     }
8043     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8044   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8045   if (scale) {
8046     /* scale c for all diagonal values = 1 or -1 */
8047     Vec diag;
8048 
8049     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8050     PetscCall(MatGetDiagonal(Gmat, diag));
8051     PetscCall(VecReciprocal(diag));
8052     PetscCall(VecSqrtAbs(diag));
8053     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8054     PetscCall(VecDestroy(&diag));
8055   }
8056   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8057   if (filter >= 0) {
8058     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8059     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8060   }
8061   *a_Gmat = Gmat;
8062   PetscFunctionReturn(PETSC_SUCCESS);
8063 }
8064 
8065 /*
8066     Special version for direct calls from Fortran
8067 */
8068 
8069 /* Change these macros so can be used in void function */
8070 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8071 #undef PetscCall
8072 #define PetscCall(...) \
8073   do { \
8074     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8075     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8076       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8077       return; \
8078     } \
8079   } while (0)
8080 
8081 #undef SETERRQ
8082 #define SETERRQ(comm, ierr, ...) \
8083   do { \
8084     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8085     return; \
8086   } while (0)
8087 
8088 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8089   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8090 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8091   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8092 #else
8093 #endif
8094 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8095 {
8096   Mat         mat = *mmat;
8097   PetscInt    m = *mm, n = *mn;
8098   InsertMode  addv = *maddv;
8099   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8100   PetscScalar value;
8101 
8102   MatCheckPreallocated(mat, 1);
8103   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8104   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8105   {
8106     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8107     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8108     PetscBool roworiented = aij->roworiented;
8109 
8110     /* Some Variables required in the macro */
8111     Mat         A     = aij->A;
8112     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8113     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8114     MatScalar  *aa;
8115     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8116     Mat         B                 = aij->B;
8117     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8118     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8119     MatScalar  *ba;
8120     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8121      * cannot use "#if defined" inside a macro. */
8122     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8123 
8124     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8125     PetscInt   nonew = a->nonew;
8126     MatScalar *ap1, *ap2;
8127 
8128     PetscFunctionBegin;
8129     PetscCall(MatSeqAIJGetArray(A, &aa));
8130     PetscCall(MatSeqAIJGetArray(B, &ba));
8131     for (i = 0; i < m; i++) {
8132       if (im[i] < 0) continue;
8133       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8134       if (im[i] >= rstart && im[i] < rend) {
8135         row      = im[i] - rstart;
8136         lastcol1 = -1;
8137         rp1      = aj + ai[row];
8138         ap1      = aa + ai[row];
8139         rmax1    = aimax[row];
8140         nrow1    = ailen[row];
8141         low1     = 0;
8142         high1    = nrow1;
8143         lastcol2 = -1;
8144         rp2      = bj + bi[row];
8145         ap2      = ba + bi[row];
8146         rmax2    = bimax[row];
8147         nrow2    = bilen[row];
8148         low2     = 0;
8149         high2    = nrow2;
8150 
8151         for (j = 0; j < n; j++) {
8152           if (roworiented) value = v[i * n + j];
8153           else value = v[i + j * m];
8154           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8155           if (in[j] >= cstart && in[j] < cend) {
8156             col = in[j] - cstart;
8157             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8158           } else if (in[j] < 0) continue;
8159           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8160             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8161           } else {
8162             if (mat->was_assembled) {
8163               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8164 #if defined(PETSC_USE_CTABLE)
8165               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8166               col--;
8167 #else
8168               col = aij->colmap[in[j]] - 1;
8169 #endif
8170               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8171                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8172                 col = in[j];
8173                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8174                 B        = aij->B;
8175                 b        = (Mat_SeqAIJ *)B->data;
8176                 bimax    = b->imax;
8177                 bi       = b->i;
8178                 bilen    = b->ilen;
8179                 bj       = b->j;
8180                 rp2      = bj + bi[row];
8181                 ap2      = ba + bi[row];
8182                 rmax2    = bimax[row];
8183                 nrow2    = bilen[row];
8184                 low2     = 0;
8185                 high2    = nrow2;
8186                 bm       = aij->B->rmap->n;
8187                 ba       = b->a;
8188                 inserted = PETSC_FALSE;
8189               }
8190             } else col = in[j];
8191             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8192           }
8193         }
8194       } else if (!aij->donotstash) {
8195         if (roworiented) {
8196           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8197         } else {
8198           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8199         }
8200       }
8201     }
8202     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8203     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8204   }
8205   PetscFunctionReturnVoid();
8206 }
8207 
8208 /* Undefining these here since they were redefined from their original definition above! No
8209  * other PETSc functions should be defined past this point, as it is impossible to recover the
8210  * original definitions */
8211 #undef PetscCall
8212 #undef SETERRQ
8213