xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 258f4e960de9459edc774f7eab4bc0a4819f7230)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
15   PetscCall(MatStashDestroy_Private(&mat->stash));
16   PetscCall(VecDestroy(&aij->diag));
17   PetscCall(MatDestroy(&aij->A));
18   PetscCall(MatDestroy(&aij->B));
19 #if defined(PETSC_USE_CTABLE)
20   PetscCall(PetscHMapIDestroy(&aij->colmap));
21 #else
22   PetscCall(PetscFree(aij->colmap));
23 #endif
24   PetscCall(PetscFree(aij->garray));
25   PetscCall(VecDestroy(&aij->lvec));
26   PetscCall(VecScatterDestroy(&aij->Mvctx));
27   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
28   PetscCall(PetscFree(aij->ld));
29 
30   PetscCall(PetscFree(mat->data));
31 
32   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
33   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
34 
35   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
36   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
37   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
38   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
39   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
40   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
45 #if defined(PETSC_HAVE_CUDA)
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
47 #endif
48 #if defined(PETSC_HAVE_HIP)
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
50 #endif
51 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
52   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
53 #endif
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
55 #if defined(PETSC_HAVE_ELEMENTAL)
56   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
57 #endif
58 #if defined(PETSC_HAVE_SCALAPACK)
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
60 #endif
61 #if defined(PETSC_HAVE_HYPRE)
62   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
63   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
64 #endif
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
71 #if defined(PETSC_HAVE_MKL_SPARSE)
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
73 #endif
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
76   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
79   PetscFunctionReturn(PETSC_SUCCESS);
80 }
81 
82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
83 #define TYPE AIJ
84 #define TYPE_AIJ
85 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
86 #undef TYPE
87 #undef TYPE_AIJ
88 
89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
90 {
91   Mat B;
92 
93   PetscFunctionBegin;
94   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
95   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
96   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
97   PetscCall(MatDestroy(&B));
98   PetscFunctionReturn(PETSC_SUCCESS);
99 }
100 
101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
102 {
103   Mat B;
104 
105   PetscFunctionBegin;
106   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
107   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
108   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
109   PetscFunctionReturn(PETSC_SUCCESS);
110 }
111 
112 /*MC
113    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
114 
115    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
116    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
117   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
118   for communicators controlling multiple processes.  It is recommended that you call both of
119   the above preallocation routines for simplicity.
120 
121    Options Database Key:
122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
123 
124   Developer Note:
125   Level: beginner
126 
127     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
128    enough exist.
129 
130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
131 M*/
132 
133 /*MC
134    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
135 
136    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
137    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
138    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
139   for communicators controlling multiple processes.  It is recommended that you call both of
140   the above preallocation routines for simplicity.
141 
142    Options Database Key:
143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
144 
145   Level: beginner
146 
147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
148 M*/
149 
150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
151 {
152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
153 
154   PetscFunctionBegin;
155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
156   A->boundtocpu = flg;
157 #endif
158   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
159   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
160 
161   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
162    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
163    * to differ from the parent matrix. */
164   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
165   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
166   PetscFunctionReturn(PETSC_SUCCESS);
167 }
168 
169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
170 {
171   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
172 
173   PetscFunctionBegin;
174   if (mat->A) {
175     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
176     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
177   }
178   PetscFunctionReturn(PETSC_SUCCESS);
179 }
180 
181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
182 {
183   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
184   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
185   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
186   const PetscInt  *ia, *ib;
187   const MatScalar *aa, *bb, *aav, *bav;
188   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
189   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
190 
191   PetscFunctionBegin;
192   *keptrows = NULL;
193 
194   ia = a->i;
195   ib = b->i;
196   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
197   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
198   for (i = 0; i < m; i++) {
199     na = ia[i + 1] - ia[i];
200     nb = ib[i + 1] - ib[i];
201     if (!na && !nb) {
202       cnt++;
203       goto ok1;
204     }
205     aa = aav + ia[i];
206     for (j = 0; j < na; j++) {
207       if (aa[j] != 0.0) goto ok1;
208     }
209     bb = PetscSafePointerPlusOffset(bav, ib[i]);
210     for (j = 0; j < nb; j++) {
211       if (bb[j] != 0.0) goto ok1;
212     }
213     cnt++;
214   ok1:;
215   }
216   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
217   if (!n0rows) {
218     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
219     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
220     PetscFunctionReturn(PETSC_SUCCESS);
221   }
222   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
223   cnt = 0;
224   for (i = 0; i < m; i++) {
225     na = ia[i + 1] - ia[i];
226     nb = ib[i + 1] - ib[i];
227     if (!na && !nb) continue;
228     aa = aav + ia[i];
229     for (j = 0; j < na; j++) {
230       if (aa[j] != 0.0) {
231         rows[cnt++] = rstart + i;
232         goto ok2;
233       }
234     }
235     bb = PetscSafePointerPlusOffset(bav, ib[i]);
236     for (j = 0; j < nb; j++) {
237       if (bb[j] != 0.0) {
238         rows[cnt++] = rstart + i;
239         goto ok2;
240       }
241     }
242   ok2:;
243   }
244   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
245   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
246   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
247   PetscFunctionReturn(PETSC_SUCCESS);
248 }
249 
250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
251 {
252   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
253   PetscBool   cong;
254 
255   PetscFunctionBegin;
256   PetscCall(MatHasCongruentLayouts(Y, &cong));
257   if (Y->assembled && cong) {
258     PetscCall(MatDiagonalSet(aij->A, D, is));
259   } else {
260     PetscCall(MatDiagonalSet_Default(Y, D, is));
261   }
262   PetscFunctionReturn(PETSC_SUCCESS);
263 }
264 
265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
266 {
267   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
268   PetscInt    i, rstart, nrows, *rows;
269 
270   PetscFunctionBegin;
271   *zrows = NULL;
272   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
273   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
274   for (i = 0; i < nrows; i++) rows[i] += rstart;
275   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
276   PetscFunctionReturn(PETSC_SUCCESS);
277 }
278 
279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
280 {
281   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
282   PetscInt           i, m, n, *garray = aij->garray;
283   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
284   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
285   PetscReal         *work;
286   const PetscScalar *dummy;
287   PetscMPIInt        in;
288 
289   PetscFunctionBegin;
290   PetscCall(MatGetSize(A, &m, &n));
291   PetscCall(PetscCalloc1(n, &work));
292   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
293   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
294   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
295   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
296   if (type == NORM_2) {
297     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
298     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
299   } else if (type == NORM_1) {
300     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
301     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
302   } else if (type == NORM_INFINITY) {
303     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
304     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
305   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
306     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
307     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
308   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
309     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
310     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
311   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
312   PetscCall(PetscMPIIntCast(n, &in));
313   if (type == NORM_INFINITY) {
314     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
315   } else {
316     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
317   }
318   PetscCall(PetscFree(work));
319   if (type == NORM_2) {
320     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
321   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
322     for (i = 0; i < n; i++) reductions[i] /= m;
323   }
324   PetscFunctionReturn(PETSC_SUCCESS);
325 }
326 
327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
328 {
329   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
330   IS              sis, gis;
331   const PetscInt *isis, *igis;
332   PetscInt        n, *iis, nsis, ngis, rstart, i;
333 
334   PetscFunctionBegin;
335   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
336   PetscCall(MatFindNonzeroRows(a->B, &gis));
337   PetscCall(ISGetSize(gis, &ngis));
338   PetscCall(ISGetSize(sis, &nsis));
339   PetscCall(ISGetIndices(sis, &isis));
340   PetscCall(ISGetIndices(gis, &igis));
341 
342   PetscCall(PetscMalloc1(ngis + nsis, &iis));
343   PetscCall(PetscArraycpy(iis, igis, ngis));
344   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
345   n = ngis + nsis;
346   PetscCall(PetscSortRemoveDupsInt(&n, iis));
347   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
348   for (i = 0; i < n; i++) iis[i] += rstart;
349   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
350 
351   PetscCall(ISRestoreIndices(sis, &isis));
352   PetscCall(ISRestoreIndices(gis, &igis));
353   PetscCall(ISDestroy(&sis));
354   PetscCall(ISDestroy(&gis));
355   PetscFunctionReturn(PETSC_SUCCESS);
356 }
357 
358 /*
359   Local utility routine that creates a mapping from the global column
360 number to the local number in the off-diagonal part of the local
361 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
362 a slightly higher hash table cost; without it it is not scalable (each processor
363 has an order N integer array but is fast to access.
364 */
365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
366 {
367   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
368   PetscInt    n   = aij->B->cmap->n, i;
369 
370   PetscFunctionBegin;
371   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
372 #if defined(PETSC_USE_CTABLE)
373   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
374   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
375 #else
376   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
377   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
378 #endif
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
383   do { \
384     if (col <= lastcol1) low1 = 0; \
385     else high1 = nrow1; \
386     lastcol1 = col; \
387     while (high1 - low1 > 5) { \
388       t = (low1 + high1) / 2; \
389       if (rp1[t] > col) high1 = t; \
390       else low1 = t; \
391     } \
392     for (_i = low1; _i < high1; _i++) { \
393       if (rp1[_i] > col) break; \
394       if (rp1[_i] == col) { \
395         if (addv == ADD_VALUES) { \
396           ap1[_i] += value; \
397           /* Not sure LogFlops will slow dow the code or not */ \
398           (void)PetscLogFlops(1.0); \
399         } else ap1[_i] = value; \
400         goto a_noinsert; \
401       } \
402     } \
403     if (value == 0.0 && ignorezeroentries && row != col) { \
404       low1  = 0; \
405       high1 = nrow1; \
406       goto a_noinsert; \
407     } \
408     if (nonew == 1) { \
409       low1  = 0; \
410       high1 = nrow1; \
411       goto a_noinsert; \
412     } \
413     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
414     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
415     N = nrow1++ - 1; \
416     a->nz++; \
417     high1++; \
418     /* shift up all the later entries in this row */ \
419     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
420     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
421     rp1[_i] = col; \
422     ap1[_i] = value; \
423   a_noinsert:; \
424     ailen[row] = nrow1; \
425   } while (0)
426 
427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
428   do { \
429     if (col <= lastcol2) low2 = 0; \
430     else high2 = nrow2; \
431     lastcol2 = col; \
432     while (high2 - low2 > 5) { \
433       t = (low2 + high2) / 2; \
434       if (rp2[t] > col) high2 = t; \
435       else low2 = t; \
436     } \
437     for (_i = low2; _i < high2; _i++) { \
438       if (rp2[_i] > col) break; \
439       if (rp2[_i] == col) { \
440         if (addv == ADD_VALUES) { \
441           ap2[_i] += value; \
442           (void)PetscLogFlops(1.0); \
443         } else ap2[_i] = value; \
444         goto b_noinsert; \
445       } \
446     } \
447     if (value == 0.0 && ignorezeroentries) { \
448       low2  = 0; \
449       high2 = nrow2; \
450       goto b_noinsert; \
451     } \
452     if (nonew == 1) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
458     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
459     N = nrow2++ - 1; \
460     b->nz++; \
461     high2++; \
462     /* shift up all the later entries in this row */ \
463     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
464     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
465     rp2[_i] = col; \
466     ap2[_i] = value; \
467   b_noinsert:; \
468     bilen[row] = nrow2; \
469   } while (0)
470 
471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
472 {
473   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
474   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
475   PetscInt     l, *garray                         = mat->garray, diag;
476   PetscScalar *aa, *ba;
477 
478   PetscFunctionBegin;
479   /* code only works for square matrices A */
480 
481   /* find size of row to the left of the diagonal part */
482   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
483   row = row - diag;
484   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
485     if (garray[b->j[b->i[row] + l]] > diag) break;
486   }
487   if (l) {
488     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
489     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
490     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
491   }
492 
493   /* diagonal part */
494   if (a->i[row + 1] - a->i[row]) {
495     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
496     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
497     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
498   }
499 
500   /* right of diagonal part */
501   if (b->i[row + 1] - b->i[row] - l) {
502     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
503     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
504     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
505   }
506   PetscFunctionReturn(PETSC_SUCCESS);
507 }
508 
509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
510 {
511   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
512   PetscScalar value = 0.0;
513   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
514   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
515   PetscBool   roworiented = aij->roworiented;
516 
517   /* Some Variables required in the macro */
518   Mat         A     = aij->A;
519   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
520   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
521   PetscBool   ignorezeroentries = a->ignorezeroentries;
522   Mat         B                 = aij->B;
523   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
524   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
525   MatScalar  *aa, *ba;
526   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
527   PetscInt    nonew;
528   MatScalar  *ap1, *ap2;
529 
530   PetscFunctionBegin;
531   PetscCall(MatSeqAIJGetArray(A, &aa));
532   PetscCall(MatSeqAIJGetArray(B, &ba));
533   for (i = 0; i < m; i++) {
534     if (im[i] < 0) continue;
535     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
536     if (im[i] >= rstart && im[i] < rend) {
537       row      = im[i] - rstart;
538       lastcol1 = -1;
539       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
540       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
541       rmax1    = aimax[row];
542       nrow1    = ailen[row];
543       low1     = 0;
544       high1    = nrow1;
545       lastcol2 = -1;
546       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
547       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
548       rmax2    = bimax[row];
549       nrow2    = bilen[row];
550       low2     = 0;
551       high2    = nrow2;
552 
553       for (j = 0; j < n; j++) {
554         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
555         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
556         if (in[j] >= cstart && in[j] < cend) {
557           col   = in[j] - cstart;
558           nonew = a->nonew;
559           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
560         } else if (in[j] < 0) {
561           continue;
562         } else {
563           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
564           if (mat->was_assembled) {
565             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
566 #if defined(PETSC_USE_CTABLE)
567             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
568             col--;
569 #else
570             col = aij->colmap[in[j]] - 1;
571 #endif
572             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
573               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
574               col = in[j];
575               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
576               B     = aij->B;
577               b     = (Mat_SeqAIJ *)B->data;
578               bimax = b->imax;
579               bi    = b->i;
580               bilen = b->ilen;
581               bj    = b->j;
582               ba    = b->a;
583               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
584               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
585               rmax2 = bimax[row];
586               nrow2 = bilen[row];
587               low2  = 0;
588               high2 = nrow2;
589               bm    = aij->B->rmap->n;
590               ba    = b->a;
591             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
592               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
593                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
594               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
595             }
596           } else col = in[j];
597           nonew = b->nonew;
598           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
599         }
600       }
601     } else {
602       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
603       if (!aij->donotstash) {
604         mat->assembled = PETSC_FALSE;
605         if (roworiented) {
606           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
607         } else {
608           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
609         }
610       }
611     }
612   }
613   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
614   PetscCall(MatSeqAIJRestoreArray(B, &ba));
615   PetscFunctionReturn(PETSC_SUCCESS);
616 }
617 
618 /*
619     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
620     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
621     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
622 */
623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
624 {
625   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
626   Mat         A      = aij->A; /* diagonal part of the matrix */
627   Mat         B      = aij->B; /* off-diagonal part of the matrix */
628   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
629   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
630   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
631   PetscInt   *ailen = a->ilen, *aj = a->j;
632   PetscInt   *bilen = b->ilen, *bj = b->j;
633   PetscInt    am          = aij->A->rmap->n, j;
634   PetscInt    diag_so_far = 0, dnz;
635   PetscInt    offd_so_far = 0, onz;
636 
637   PetscFunctionBegin;
638   /* Iterate over all rows of the matrix */
639   for (j = 0; j < am; j++) {
640     dnz = onz = 0;
641     /*  Iterate over all non-zero columns of the current row */
642     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
643       /* If column is in the diagonal */
644       if (mat_j[col] >= cstart && mat_j[col] < cend) {
645         aj[diag_so_far++] = mat_j[col] - cstart;
646         dnz++;
647       } else { /* off-diagonal entries */
648         bj[offd_so_far++] = mat_j[col];
649         onz++;
650       }
651     }
652     ailen[j] = dnz;
653     bilen[j] = onz;
654   }
655   PetscFunctionReturn(PETSC_SUCCESS);
656 }
657 
658 /*
659     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
660     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
661     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
662     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
663     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
664 */
665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
666 {
667   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
668   Mat          A    = aij->A; /* diagonal part of the matrix */
669   Mat          B    = aij->B; /* off-diagonal part of the matrix */
670   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
671   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
672   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
673   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
674   PetscInt    *ailen = a->ilen, *aj = a->j;
675   PetscInt    *bilen = b->ilen, *bj = b->j;
676   PetscInt     am          = aij->A->rmap->n, j;
677   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
678   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
679   PetscScalar *aa = a->a, *ba = b->a;
680 
681   PetscFunctionBegin;
682   /* Iterate over all rows of the matrix */
683   for (j = 0; j < am; j++) {
684     dnz_row = onz_row = 0;
685     rowstart_offd     = full_offd_i[j];
686     rowstart_diag     = full_diag_i[j];
687     /*  Iterate over all non-zero columns of the current row */
688     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
689       /* If column is in the diagonal */
690       if (mat_j[col] >= cstart && mat_j[col] < cend) {
691         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
692         aa[rowstart_diag + dnz_row] = mat_a[col];
693         dnz_row++;
694       } else { /* off-diagonal entries */
695         bj[rowstart_offd + onz_row] = mat_j[col];
696         ba[rowstart_offd + onz_row] = mat_a[col];
697         onz_row++;
698       }
699     }
700     ailen[j] = dnz_row;
701     bilen[j] = onz_row;
702   }
703   PetscFunctionReturn(PETSC_SUCCESS);
704 }
705 
706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
707 {
708   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
709   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
710   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
711 
712   PetscFunctionBegin;
713   for (i = 0; i < m; i++) {
714     if (idxm[i] < 0) continue; /* negative row */
715     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
716     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
717     row = idxm[i] - rstart;
718     for (j = 0; j < n; j++) {
719       if (idxn[j] < 0) continue; /* negative column */
720       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
721       if (idxn[j] >= cstart && idxn[j] < cend) {
722         col = idxn[j] - cstart;
723         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
724       } else {
725         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
726 #if defined(PETSC_USE_CTABLE)
727         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
728         col--;
729 #else
730         col = aij->colmap[idxn[j]] - 1;
731 #endif
732         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
733         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
734       }
735     }
736   }
737   PetscFunctionReturn(PETSC_SUCCESS);
738 }
739 
740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
741 {
742   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
743   PetscInt    nstash, reallocs;
744 
745   PetscFunctionBegin;
746   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
747 
748   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
749   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
750   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
751   PetscFunctionReturn(PETSC_SUCCESS);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
755 {
756   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
757   PetscMPIInt  n;
758   PetscInt     i, j, rstart, ncols, flg;
759   PetscInt    *row, *col;
760   PetscBool    other_disassembled;
761   PetscScalar *val;
762 
763   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
764 
765   PetscFunctionBegin;
766   if (!aij->donotstash && !mat->nooffprocentries) {
767     while (1) {
768       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
769       if (!flg) break;
770 
771       for (i = 0; i < n;) {
772         /* Now identify the consecutive vals belonging to the same row */
773         for (j = i, rstart = row[j]; j < n; j++) {
774           if (row[j] != rstart) break;
775         }
776         if (j < n) ncols = j - i;
777         else ncols = n - i;
778         /* Now assemble all these values with a single function call */
779         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
780         i = j;
781       }
782     }
783     PetscCall(MatStashScatterEnd_Private(&mat->stash));
784   }
785 #if defined(PETSC_HAVE_DEVICE)
786   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
787   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
788   if (mat->boundtocpu) {
789     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
790     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
791   }
792 #endif
793   PetscCall(MatAssemblyBegin(aij->A, mode));
794   PetscCall(MatAssemblyEnd(aij->A, mode));
795 
796   /* determine if any processor has disassembled, if so we must
797      also disassemble ourself, in order that we may reassemble. */
798   /*
799      if nonzero structure of submatrix B cannot change then we know that
800      no processor disassembled thus we can skip this stuff
801   */
802   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
803     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
804     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
805       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
806     }
807   }
808   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
809   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
810 #if defined(PETSC_HAVE_DEVICE)
811   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
812 #endif
813   PetscCall(MatAssemblyBegin(aij->B, mode));
814   PetscCall(MatAssemblyEnd(aij->B, mode));
815 
816   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
817 
818   aij->rowvalues = NULL;
819 
820   PetscCall(VecDestroy(&aij->diag));
821 
822   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
823   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
824     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
825     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
826   }
827 #if defined(PETSC_HAVE_DEVICE)
828   mat->offloadmask = PETSC_OFFLOAD_BOTH;
829 #endif
830   PetscFunctionReturn(PETSC_SUCCESS);
831 }
832 
833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
834 {
835   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
836 
837   PetscFunctionBegin;
838   PetscCall(MatZeroEntries(l->A));
839   PetscCall(MatZeroEntries(l->B));
840   PetscFunctionReturn(PETSC_SUCCESS);
841 }
842 
843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
844 {
845   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
846   PetscInt   *lrows;
847   PetscInt    r, len;
848   PetscBool   cong;
849 
850   PetscFunctionBegin;
851   /* get locally owned rows */
852   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
853   PetscCall(MatHasCongruentLayouts(A, &cong));
854   /* fix right-hand side if needed */
855   if (x && b) {
856     const PetscScalar *xx;
857     PetscScalar       *bb;
858 
859     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
860     PetscCall(VecGetArrayRead(x, &xx));
861     PetscCall(VecGetArray(b, &bb));
862     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
863     PetscCall(VecRestoreArrayRead(x, &xx));
864     PetscCall(VecRestoreArray(b, &bb));
865   }
866 
867   if (diag != 0.0 && cong) {
868     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
869     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
870   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
871     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
872     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
873     PetscInt    nnwA, nnwB;
874     PetscBool   nnzA, nnzB;
875 
876     nnwA = aijA->nonew;
877     nnwB = aijB->nonew;
878     nnzA = aijA->keepnonzeropattern;
879     nnzB = aijB->keepnonzeropattern;
880     if (!nnzA) {
881       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
882       aijA->nonew = 0;
883     }
884     if (!nnzB) {
885       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
886       aijB->nonew = 0;
887     }
888     /* Must zero here before the next loop */
889     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
890     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
891     for (r = 0; r < len; ++r) {
892       const PetscInt row = lrows[r] + A->rmap->rstart;
893       if (row >= A->cmap->N) continue;
894       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
895     }
896     aijA->nonew = nnwA;
897     aijB->nonew = nnwB;
898   } else {
899     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
900     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
901   }
902   PetscCall(PetscFree(lrows));
903   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
904   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
905 
906   /* only change matrix nonzero state if pattern was allowed to be changed */
907   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
908     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
909     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
910   }
911   PetscFunctionReturn(PETSC_SUCCESS);
912 }
913 
914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
915 {
916   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
917   PetscInt           n = A->rmap->n;
918   PetscInt           i, j, r, m, len = 0;
919   PetscInt          *lrows, *owners = A->rmap->range;
920   PetscMPIInt        p = 0;
921   PetscSFNode       *rrows;
922   PetscSF            sf;
923   const PetscScalar *xx;
924   PetscScalar       *bb, *mask, *aij_a;
925   Vec                xmask, lmask;
926   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
927   const PetscInt    *aj, *ii, *ridx;
928   PetscScalar       *aa;
929 
930   PetscFunctionBegin;
931   /* Create SF where leaves are input rows and roots are owned rows */
932   PetscCall(PetscMalloc1(n, &lrows));
933   for (r = 0; r < n; ++r) lrows[r] = -1;
934   PetscCall(PetscMalloc1(N, &rrows));
935   for (r = 0; r < N; ++r) {
936     const PetscInt idx = rows[r];
937     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
938     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
939       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
940     }
941     rrows[r].rank  = p;
942     rrows[r].index = rows[r] - owners[p];
943   }
944   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
945   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
946   /* Collect flags for rows to be zeroed */
947   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
948   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
949   PetscCall(PetscSFDestroy(&sf));
950   /* Compress and put in row numbers */
951   for (r = 0; r < n; ++r)
952     if (lrows[r] >= 0) lrows[len++] = r;
953   /* zero diagonal part of matrix */
954   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
955   /* handle off-diagonal part of matrix */
956   PetscCall(MatCreateVecs(A, &xmask, NULL));
957   PetscCall(VecDuplicate(l->lvec, &lmask));
958   PetscCall(VecGetArray(xmask, &bb));
959   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
960   PetscCall(VecRestoreArray(xmask, &bb));
961   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
962   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
963   PetscCall(VecDestroy(&xmask));
964   if (x && b) { /* this code is buggy when the row and column layout don't match */
965     PetscBool cong;
966 
967     PetscCall(MatHasCongruentLayouts(A, &cong));
968     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
969     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
970     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
971     PetscCall(VecGetArrayRead(l->lvec, &xx));
972     PetscCall(VecGetArray(b, &bb));
973   }
974   PetscCall(VecGetArray(lmask, &mask));
975   /* remove zeroed rows of off-diagonal matrix */
976   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
977   ii = aij->i;
978   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
979   /* loop over all elements of off process part of matrix zeroing removed columns*/
980   if (aij->compressedrow.use) {
981     m    = aij->compressedrow.nrows;
982     ii   = aij->compressedrow.i;
983     ridx = aij->compressedrow.rindex;
984     for (i = 0; i < m; i++) {
985       n  = ii[i + 1] - ii[i];
986       aj = aij->j + ii[i];
987       aa = aij_a + ii[i];
988 
989       for (j = 0; j < n; j++) {
990         if (PetscAbsScalar(mask[*aj])) {
991           if (b) bb[*ridx] -= *aa * xx[*aj];
992           *aa = 0.0;
993         }
994         aa++;
995         aj++;
996       }
997       ridx++;
998     }
999   } else { /* do not use compressed row format */
1000     m = l->B->rmap->n;
1001     for (i = 0; i < m; i++) {
1002       n  = ii[i + 1] - ii[i];
1003       aj = aij->j + ii[i];
1004       aa = aij_a + ii[i];
1005       for (j = 0; j < n; j++) {
1006         if (PetscAbsScalar(mask[*aj])) {
1007           if (b) bb[i] -= *aa * xx[*aj];
1008           *aa = 0.0;
1009         }
1010         aa++;
1011         aj++;
1012       }
1013     }
1014   }
1015   if (x && b) {
1016     PetscCall(VecRestoreArray(b, &bb));
1017     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1018   }
1019   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1020   PetscCall(VecRestoreArray(lmask, &mask));
1021   PetscCall(VecDestroy(&lmask));
1022   PetscCall(PetscFree(lrows));
1023 
1024   /* only change matrix nonzero state if pattern was allowed to be changed */
1025   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1026     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1027     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1028   }
1029   PetscFunctionReturn(PETSC_SUCCESS);
1030 }
1031 
1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1033 {
1034   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1035   PetscInt    nt;
1036   VecScatter  Mvctx = a->Mvctx;
1037 
1038   PetscFunctionBegin;
1039   PetscCall(VecGetLocalSize(xx, &nt));
1040   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1041   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1042   PetscUseTypeMethod(a->A, mult, xx, yy);
1043   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1044   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1045   PetscFunctionReturn(PETSC_SUCCESS);
1046 }
1047 
1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1049 {
1050   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1051 
1052   PetscFunctionBegin;
1053   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1054   PetscFunctionReturn(PETSC_SUCCESS);
1055 }
1056 
1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1058 {
1059   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1060   VecScatter  Mvctx = a->Mvctx;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1065   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1071 {
1072   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073 
1074   PetscFunctionBegin;
1075   /* do nondiagonal part */
1076   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1077   /* do local part */
1078   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1079   /* add partial results together */
1080   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1081   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1082   PetscFunctionReturn(PETSC_SUCCESS);
1083 }
1084 
1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1086 {
1087   MPI_Comm    comm;
1088   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1089   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1090   IS          Me, Notme;
1091   PetscInt    M, N, first, last, *notme, i;
1092   PetscBool   lf;
1093   PetscMPIInt size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1098   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1099   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1100   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1101   PetscCallMPI(MPI_Comm_size(comm, &size));
1102   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1103 
1104   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1105   PetscCall(MatGetSize(Amat, &M, &N));
1106   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1107   PetscCall(PetscMalloc1(N - last + first, &notme));
1108   for (i = 0; i < first; i++) notme[i] = i;
1109   for (i = last; i < M; i++) notme[i - last + first] = i;
1110   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1111   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1112   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1113   Aoff = Aoffs[0];
1114   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1115   Boff = Boffs[0];
1116   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1117   PetscCall(MatDestroyMatrices(1, &Aoffs));
1118   PetscCall(MatDestroyMatrices(1, &Boffs));
1119   PetscCall(ISDestroy(&Me));
1120   PetscCall(ISDestroy(&Notme));
1121   PetscCall(PetscFree(notme));
1122   PetscFunctionReturn(PETSC_SUCCESS);
1123 }
1124 
1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1126 {
1127   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1128 
1129   PetscFunctionBegin;
1130   /* do nondiagonal part */
1131   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1132   /* do local part */
1133   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1134   /* add partial results together */
1135   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1136   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1137   PetscFunctionReturn(PETSC_SUCCESS);
1138 }
1139 
1140 /*
1141   This only works correctly for square matrices where the subblock A->A is the
1142    diagonal block
1143 */
1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1150   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1151   PetscCall(MatGetDiagonal(a->A, v));
1152   PetscFunctionReturn(PETSC_SUCCESS);
1153 }
1154 
1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1156 {
1157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1158 
1159   PetscFunctionBegin;
1160   PetscCall(MatScale(a->A, aa));
1161   PetscCall(MatScale(a->B, aa));
1162   PetscFunctionReturn(PETSC_SUCCESS);
1163 }
1164 
1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1166 {
1167   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1168   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1169   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1170   const PetscInt    *garray = aij->garray;
1171   const PetscScalar *aa, *ba;
1172   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1173   PetscInt64         nz, hnz;
1174   PetscInt          *rowlens;
1175   PetscInt          *colidxs;
1176   PetscScalar       *matvals;
1177   PetscMPIInt        rank;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(PetscViewerSetUp(viewer));
1181 
1182   M  = mat->rmap->N;
1183   N  = mat->cmap->N;
1184   m  = mat->rmap->n;
1185   rs = mat->rmap->rstart;
1186   cs = mat->cmap->rstart;
1187   nz = A->nz + B->nz;
1188 
1189   /* write matrix header */
1190   header[0] = MAT_FILE_CLASSID;
1191   header[1] = M;
1192   header[2] = N;
1193   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1194   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1195   if (rank == 0) {
1196     if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX;
1197     else header[3] = (PetscInt)hnz;
1198   }
1199   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1200 
1201   /* fill in and store row lengths  */
1202   PetscCall(PetscMalloc1(m, &rowlens));
1203   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1204   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1205   PetscCall(PetscFree(rowlens));
1206 
1207   /* fill in and store column indices */
1208   PetscCall(PetscMalloc1(nz, &colidxs));
1209   for (cnt = 0, i = 0; i < m; i++) {
1210     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1211       if (garray[B->j[jb]] > cs) break;
1212       colidxs[cnt++] = garray[B->j[jb]];
1213     }
1214     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1215     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1216   }
1217   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1218   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1219   PetscCall(PetscFree(colidxs));
1220 
1221   /* fill in and store nonzero values */
1222   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1223   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1224   PetscCall(PetscMalloc1(nz, &matvals));
1225   for (cnt = 0, i = 0; i < m; i++) {
1226     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       matvals[cnt++] = ba[jb];
1229     }
1230     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1231     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1232   }
1233   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1234   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1235   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1236   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1237   PetscCall(PetscFree(matvals));
1238 
1239   /* write block size option to the viewer's .info file */
1240   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1241   PetscFunctionReturn(PETSC_SUCCESS);
1242 }
1243 
1244 #include <petscdraw.h>
1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1246 {
1247   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1248   PetscMPIInt       rank = aij->rank, size = aij->size;
1249   PetscBool         isdraw, iascii, isbinary;
1250   PetscViewer       sviewer;
1251   PetscViewerFormat format;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1255   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1256   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1257   if (iascii) {
1258     PetscCall(PetscViewerGetFormat(viewer, &format));
1259     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1260       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1261       PetscCall(PetscMalloc1(size, &nz));
1262       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1263       for (i = 0; i < (PetscInt)size; i++) {
1264         nmax = PetscMax(nmax, nz[i]);
1265         nmin = PetscMin(nmin, nz[i]);
1266         navg += nz[i];
1267       }
1268       PetscCall(PetscFree(nz));
1269       navg = navg / size;
1270       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1271       PetscFunctionReturn(PETSC_SUCCESS);
1272     }
1273     PetscCall(PetscViewerGetFormat(viewer, &format));
1274     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1275       MatInfo   info;
1276       PetscInt *inodes = NULL;
1277 
1278       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1279       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1280       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1281       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1282       if (!inodes) {
1283         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1284                                                      (double)info.memory));
1285       } else {
1286         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1287                                                      (double)info.memory));
1288       }
1289       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1290       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1291       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1292       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1293       PetscCall(PetscViewerFlush(viewer));
1294       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1295       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1296       PetscCall(VecScatterView(aij->Mvctx, viewer));
1297       PetscFunctionReturn(PETSC_SUCCESS);
1298     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1299       PetscInt inodecount, inodelimit, *inodes;
1300       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1301       if (inodes) {
1302         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1303       } else {
1304         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1305       }
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     }
1310   } else if (isbinary) {
1311     if (size == 1) {
1312       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1313       PetscCall(MatView(aij->A, viewer));
1314     } else {
1315       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1316     }
1317     PetscFunctionReturn(PETSC_SUCCESS);
1318   } else if (iascii && size == 1) {
1319     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1320     PetscCall(MatView(aij->A, viewer));
1321     PetscFunctionReturn(PETSC_SUCCESS);
1322   } else if (isdraw) {
1323     PetscDraw draw;
1324     PetscBool isnull;
1325     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1326     PetscCall(PetscDrawIsNull(draw, &isnull));
1327     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1328   }
1329 
1330   { /* assemble the entire matrix onto first processor */
1331     Mat A = NULL, Av;
1332     IS  isrow, iscol;
1333 
1334     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1335     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1336     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1337     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1338     /*  The commented code uses MatCreateSubMatrices instead */
1339     /*
1340     Mat *AA, A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1344     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1345     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1346     if (rank == 0) {
1347        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1348        A    = AA[0];
1349        Av   = AA[0];
1350     }
1351     PetscCall(MatDestroySubMatrices(1,&AA));
1352 */
1353     PetscCall(ISDestroy(&iscol));
1354     PetscCall(ISDestroy(&isrow));
1355     /*
1356        Everyone has to call to draw the matrix since the graphics waits are
1357        synchronized across all processors that share the PetscDraw object
1358     */
1359     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1360     if (rank == 0) {
1361       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1362       PetscCall(MatView_SeqAIJ(Av, sviewer));
1363     }
1364     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1365     PetscCall(MatDestroy(&A));
1366   }
1367   PetscFunctionReturn(PETSC_SUCCESS);
1368 }
1369 
1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1371 {
1372   PetscBool iascii, isdraw, issocket, isbinary;
1373 
1374   PetscFunctionBegin;
1375   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1376   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1377   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1378   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1379   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1380   PetscFunctionReturn(PETSC_SUCCESS);
1381 }
1382 
1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1384 {
1385   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1386   Vec         bb1 = NULL;
1387   PetscBool   hasop;
1388 
1389   PetscFunctionBegin;
1390   if (flag == SOR_APPLY_UPPER) {
1391     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1392     PetscFunctionReturn(PETSC_SUCCESS);
1393   }
1394 
1395   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1396 
1397   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1398     if (flag & SOR_ZERO_INITIAL_GUESS) {
1399       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1400       its--;
1401     }
1402 
1403     while (its--) {
1404       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1405       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1406 
1407       /* update rhs: bb1 = bb - B*x */
1408       PetscCall(VecScale(mat->lvec, -1.0));
1409       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1410 
1411       /* local sweep */
1412       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1413     }
1414   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419     while (its--) {
1420       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422 
1423       /* update rhs: bb1 = bb - B*x */
1424       PetscCall(VecScale(mat->lvec, -1.0));
1425       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1426 
1427       /* local sweep */
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1429     }
1430   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1433       its--;
1434     }
1435     while (its--) {
1436       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438 
1439       /* update rhs: bb1 = bb - B*x */
1440       PetscCall(VecScale(mat->lvec, -1.0));
1441       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1442 
1443       /* local sweep */
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1445     }
1446   } else if (flag & SOR_EISENSTAT) {
1447     Vec xx1;
1448 
1449     PetscCall(VecDuplicate(bb, &xx1));
1450     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1451 
1452     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454     if (!mat->diag) {
1455       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1456       PetscCall(MatGetDiagonal(matin, mat->diag));
1457     }
1458     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1459     if (hasop) {
1460       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1461     } else {
1462       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1463     }
1464     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1465 
1466     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1467 
1468     /* local sweep */
1469     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1470     PetscCall(VecAXPY(xx, 1.0, xx1));
1471     PetscCall(VecDestroy(&xx1));
1472   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1473 
1474   PetscCall(VecDestroy(&bb1));
1475 
1476   matin->factorerrortype = mat->A->factorerrortype;
1477   PetscFunctionReturn(PETSC_SUCCESS);
1478 }
1479 
1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1481 {
1482   Mat             aA, aB, Aperm;
1483   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1484   PetscScalar    *aa, *ba;
1485   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1486   PetscSF         rowsf, sf;
1487   IS              parcolp = NULL;
1488   PetscBool       done;
1489 
1490   PetscFunctionBegin;
1491   PetscCall(MatGetLocalSize(A, &m, &n));
1492   PetscCall(ISGetIndices(rowp, &rwant));
1493   PetscCall(ISGetIndices(colp, &cwant));
1494   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1495 
1496   /* Invert row permutation to find out where my rows should go */
1497   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1498   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1499   PetscCall(PetscSFSetFromOptions(rowsf));
1500   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1501   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1502   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1503 
1504   /* Invert column permutation to find out where my columns should go */
1505   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1506   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1507   PetscCall(PetscSFSetFromOptions(sf));
1508   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1509   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1510   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1511   PetscCall(PetscSFDestroy(&sf));
1512 
1513   PetscCall(ISRestoreIndices(rowp, &rwant));
1514   PetscCall(ISRestoreIndices(colp, &cwant));
1515   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1516 
1517   /* Find out where my gcols should go */
1518   PetscCall(MatGetSize(aB, NULL, &ng));
1519   PetscCall(PetscMalloc1(ng, &gcdest));
1520   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1521   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1522   PetscCall(PetscSFSetFromOptions(sf));
1523   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1524   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1525   PetscCall(PetscSFDestroy(&sf));
1526 
1527   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1528   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1529   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1530   for (i = 0; i < m; i++) {
1531     PetscInt    row = rdest[i];
1532     PetscMPIInt rowner;
1533     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1534     for (j = ai[i]; j < ai[i + 1]; j++) {
1535       PetscInt    col = cdest[aj[j]];
1536       PetscMPIInt cowner;
1537       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1538       if (rowner == cowner) dnnz[i]++;
1539       else onnz[i]++;
1540     }
1541     for (j = bi[i]; j < bi[i + 1]; j++) {
1542       PetscInt    col = gcdest[bj[j]];
1543       PetscMPIInt cowner;
1544       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1545       if (rowner == cowner) dnnz[i]++;
1546       else onnz[i]++;
1547     }
1548   }
1549   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1550   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1551   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1552   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1553   PetscCall(PetscSFDestroy(&rowsf));
1554 
1555   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1556   PetscCall(MatSeqAIJGetArray(aA, &aa));
1557   PetscCall(MatSeqAIJGetArray(aB, &ba));
1558   for (i = 0; i < m; i++) {
1559     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1560     PetscInt  j0, rowlen;
1561     rowlen = ai[i + 1] - ai[i];
1562     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1563       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1564       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1565     }
1566     rowlen = bi[i + 1] - bi[i];
1567     for (j0 = j = 0; j < rowlen; j0 = j) {
1568       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1569       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1570     }
1571   }
1572   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1573   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1574   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1575   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1576   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1577   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1578   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1579   PetscCall(PetscFree3(work, rdest, cdest));
1580   PetscCall(PetscFree(gcdest));
1581   if (parcolp) PetscCall(ISDestroy(&colp));
1582   *B = Aperm;
1583   PetscFunctionReturn(PETSC_SUCCESS);
1584 }
1585 
1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1587 {
1588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1589 
1590   PetscFunctionBegin;
1591   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1592   if (ghosts) *ghosts = aij->garray;
1593   PetscFunctionReturn(PETSC_SUCCESS);
1594 }
1595 
1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1597 {
1598   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1599   Mat            A = mat->A, B = mat->B;
1600   PetscLogDouble isend[5], irecv[5];
1601 
1602   PetscFunctionBegin;
1603   info->block_size = 1.0;
1604   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1605 
1606   isend[0] = info->nz_used;
1607   isend[1] = info->nz_allocated;
1608   isend[2] = info->nz_unneeded;
1609   isend[3] = info->memory;
1610   isend[4] = info->mallocs;
1611 
1612   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1613 
1614   isend[0] += info->nz_used;
1615   isend[1] += info->nz_allocated;
1616   isend[2] += info->nz_unneeded;
1617   isend[3] += info->memory;
1618   isend[4] += info->mallocs;
1619   if (flag == MAT_LOCAL) {
1620     info->nz_used      = isend[0];
1621     info->nz_allocated = isend[1];
1622     info->nz_unneeded  = isend[2];
1623     info->memory       = isend[3];
1624     info->mallocs      = isend[4];
1625   } else if (flag == MAT_GLOBAL_MAX) {
1626     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1627 
1628     info->nz_used      = irecv[0];
1629     info->nz_allocated = irecv[1];
1630     info->nz_unneeded  = irecv[2];
1631     info->memory       = irecv[3];
1632     info->mallocs      = irecv[4];
1633   } else if (flag == MAT_GLOBAL_SUM) {
1634     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1635 
1636     info->nz_used      = irecv[0];
1637     info->nz_allocated = irecv[1];
1638     info->nz_unneeded  = irecv[2];
1639     info->memory       = irecv[3];
1640     info->mallocs      = irecv[4];
1641   }
1642   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1643   info->fill_ratio_needed = 0;
1644   info->factor_mallocs    = 0;
1645   PetscFunctionReturn(PETSC_SUCCESS);
1646 }
1647 
1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1649 {
1650   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1651 
1652   PetscFunctionBegin;
1653   switch (op) {
1654   case MAT_NEW_NONZERO_LOCATIONS:
1655   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1656   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1657   case MAT_KEEP_NONZERO_PATTERN:
1658   case MAT_NEW_NONZERO_LOCATION_ERR:
1659   case MAT_USE_INODES:
1660   case MAT_IGNORE_ZERO_ENTRIES:
1661   case MAT_FORM_EXPLICIT_TRANSPOSE:
1662     MatCheckPreallocated(A, 1);
1663     PetscCall(MatSetOption(a->A, op, flg));
1664     PetscCall(MatSetOption(a->B, op, flg));
1665     break;
1666   case MAT_ROW_ORIENTED:
1667     MatCheckPreallocated(A, 1);
1668     a->roworiented = flg;
1669 
1670     PetscCall(MatSetOption(a->A, op, flg));
1671     PetscCall(MatSetOption(a->B, op, flg));
1672     break;
1673   case MAT_FORCE_DIAGONAL_ENTRIES:
1674   case MAT_SORTED_FULL:
1675     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1676     break;
1677   case MAT_IGNORE_OFF_PROC_ENTRIES:
1678     a->donotstash = flg;
1679     break;
1680   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1681   case MAT_SPD:
1682   case MAT_SYMMETRIC:
1683   case MAT_STRUCTURALLY_SYMMETRIC:
1684   case MAT_HERMITIAN:
1685   case MAT_SYMMETRY_ETERNAL:
1686   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1687   case MAT_SPD_ETERNAL:
1688     /* if the diagonal matrix is square it inherits some of the properties above */
1689     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1690     break;
1691   case MAT_SUBMAT_SINGLEIS:
1692     A->submat_singleis = flg;
1693     break;
1694   case MAT_STRUCTURE_ONLY:
1695     /* The option is handled directly by MatSetOption() */
1696     break;
1697   default:
1698     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1699   }
1700   PetscFunctionReturn(PETSC_SUCCESS);
1701 }
1702 
1703 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1704 {
1705   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1706   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1707   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1708   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1709   PetscInt    *cmap, *idx_p;
1710 
1711   PetscFunctionBegin;
1712   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1713   mat->getrowactive = PETSC_TRUE;
1714 
1715   if (!mat->rowvalues && (idx || v)) {
1716     /*
1717         allocate enough space to hold information from the longest row.
1718     */
1719     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1720     PetscInt    max = 1, tmp;
1721     for (i = 0; i < matin->rmap->n; i++) {
1722       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1723       if (max < tmp) max = tmp;
1724     }
1725     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1726   }
1727 
1728   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1729   lrow = row - rstart;
1730 
1731   pvA = &vworkA;
1732   pcA = &cworkA;
1733   pvB = &vworkB;
1734   pcB = &cworkB;
1735   if (!v) {
1736     pvA = NULL;
1737     pvB = NULL;
1738   }
1739   if (!idx) {
1740     pcA = NULL;
1741     if (!v) pcB = NULL;
1742   }
1743   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1744   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1745   nztot = nzA + nzB;
1746 
1747   cmap = mat->garray;
1748   if (v || idx) {
1749     if (nztot) {
1750       /* Sort by increasing column numbers, assuming A and B already sorted */
1751       PetscInt imark = -1;
1752       if (v) {
1753         *v = v_p = mat->rowvalues;
1754         for (i = 0; i < nzB; i++) {
1755           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1756           else break;
1757         }
1758         imark = i;
1759         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1760         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1761       }
1762       if (idx) {
1763         *idx = idx_p = mat->rowindices;
1764         if (imark > -1) {
1765           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1766         } else {
1767           for (i = 0; i < nzB; i++) {
1768             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1769             else break;
1770           }
1771           imark = i;
1772         }
1773         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1774         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1775       }
1776     } else {
1777       if (idx) *idx = NULL;
1778       if (v) *v = NULL;
1779     }
1780   }
1781   *nz = nztot;
1782   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1783   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1784   PetscFunctionReturn(PETSC_SUCCESS);
1785 }
1786 
1787 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1788 {
1789   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1790 
1791   PetscFunctionBegin;
1792   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1793   aij->getrowactive = PETSC_FALSE;
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1798 {
1799   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1800   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1801   PetscInt         i, j, cstart = mat->cmap->rstart;
1802   PetscReal        sum = 0.0;
1803   const MatScalar *v, *amata, *bmata;
1804   PetscMPIInt      iN;
1805 
1806   PetscFunctionBegin;
1807   if (aij->size == 1) {
1808     PetscCall(MatNorm(aij->A, type, norm));
1809   } else {
1810     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1811     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1812     if (type == NORM_FROBENIUS) {
1813       v = amata;
1814       for (i = 0; i < amat->nz; i++) {
1815         sum += PetscRealPart(PetscConj(*v) * (*v));
1816         v++;
1817       }
1818       v = bmata;
1819       for (i = 0; i < bmat->nz; i++) {
1820         sum += PetscRealPart(PetscConj(*v) * (*v));
1821         v++;
1822       }
1823       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1824       *norm = PetscSqrtReal(*norm);
1825       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1826     } else if (type == NORM_1) { /* max column norm */
1827       PetscReal *tmp, *tmp2;
1828       PetscInt  *jj, *garray = aij->garray;
1829       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1830       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1831       *norm = 0.0;
1832       v     = amata;
1833       jj    = amat->j;
1834       for (j = 0; j < amat->nz; j++) {
1835         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1836         v++;
1837       }
1838       v  = bmata;
1839       jj = bmat->j;
1840       for (j = 0; j < bmat->nz; j++) {
1841         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1842         v++;
1843       }
1844       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
1845       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1846       for (j = 0; j < mat->cmap->N; j++) {
1847         if (tmp2[j] > *norm) *norm = tmp2[j];
1848       }
1849       PetscCall(PetscFree(tmp));
1850       PetscCall(PetscFree(tmp2));
1851       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1852     } else if (type == NORM_INFINITY) { /* max row norm */
1853       PetscReal ntemp = 0.0;
1854       for (j = 0; j < aij->A->rmap->n; j++) {
1855         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1856         sum = 0.0;
1857         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1858           sum += PetscAbsScalar(*v);
1859           v++;
1860         }
1861         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1862         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1863           sum += PetscAbsScalar(*v);
1864           v++;
1865         }
1866         if (sum > ntemp) ntemp = sum;
1867       }
1868       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1869       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1870     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1871     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1872     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1873   }
1874   PetscFunctionReturn(PETSC_SUCCESS);
1875 }
1876 
1877 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1878 {
1879   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1880   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1881   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1882   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1883   Mat              B, A_diag, *B_diag;
1884   const MatScalar *pbv, *bv;
1885 
1886   PetscFunctionBegin;
1887   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1888   ma = A->rmap->n;
1889   na = A->cmap->n;
1890   mb = a->B->rmap->n;
1891   nb = a->B->cmap->n;
1892   ai = Aloc->i;
1893   aj = Aloc->j;
1894   bi = Bloc->i;
1895   bj = Bloc->j;
1896   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1897     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1898     PetscSFNode         *oloc;
1899     PETSC_UNUSED PetscSF sf;
1900 
1901     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1902     /* compute d_nnz for preallocation */
1903     PetscCall(PetscArrayzero(d_nnz, na));
1904     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1905     /* compute local off-diagonal contributions */
1906     PetscCall(PetscArrayzero(g_nnz, nb));
1907     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1908     /* map those to global */
1909     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1910     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1911     PetscCall(PetscSFSetFromOptions(sf));
1912     PetscCall(PetscArrayzero(o_nnz, na));
1913     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1914     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1915     PetscCall(PetscSFDestroy(&sf));
1916 
1917     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1918     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1919     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1920     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1921     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1922     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1923   } else {
1924     B = *matout;
1925     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1926   }
1927 
1928   b           = (Mat_MPIAIJ *)B->data;
1929   A_diag      = a->A;
1930   B_diag      = &b->A;
1931   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1932   A_diag_ncol = A_diag->cmap->N;
1933   B_diag_ilen = sub_B_diag->ilen;
1934   B_diag_i    = sub_B_diag->i;
1935 
1936   /* Set ilen for diagonal of B */
1937   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1938 
1939   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1940   very quickly (=without using MatSetValues), because all writes are local. */
1941   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1942   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1943 
1944   /* copy over the B part */
1945   PetscCall(PetscMalloc1(bi[mb], &cols));
1946   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1947   pbv = bv;
1948   row = A->rmap->rstart;
1949   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1950   cols_tmp = cols;
1951   for (i = 0; i < mb; i++) {
1952     ncol = bi[i + 1] - bi[i];
1953     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1954     row++;
1955     if (pbv) pbv += ncol;
1956     if (cols_tmp) cols_tmp += ncol;
1957   }
1958   PetscCall(PetscFree(cols));
1959   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1960 
1961   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1962   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1963   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1964     *matout = B;
1965   } else {
1966     PetscCall(MatHeaderMerge(A, &B));
1967   }
1968   PetscFunctionReturn(PETSC_SUCCESS);
1969 }
1970 
1971 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1972 {
1973   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1974   Mat         a = aij->A, b = aij->B;
1975   PetscInt    s1, s2, s3;
1976 
1977   PetscFunctionBegin;
1978   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1979   if (rr) {
1980     PetscCall(VecGetLocalSize(rr, &s1));
1981     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1982     /* Overlap communication with computation. */
1983     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1984   }
1985   if (ll) {
1986     PetscCall(VecGetLocalSize(ll, &s1));
1987     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1988     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1989   }
1990   /* scale  the diagonal block */
1991   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1992 
1993   if (rr) {
1994     /* Do a scatter end and then right scale the off-diagonal block */
1995     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1996     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
1997   }
1998   PetscFunctionReturn(PETSC_SUCCESS);
1999 }
2000 
2001 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2002 {
2003   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2004 
2005   PetscFunctionBegin;
2006   PetscCall(MatSetUnfactored(a->A));
2007   PetscFunctionReturn(PETSC_SUCCESS);
2008 }
2009 
2010 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2011 {
2012   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2013   Mat         a, b, c, d;
2014   PetscBool   flg;
2015 
2016   PetscFunctionBegin;
2017   a = matA->A;
2018   b = matA->B;
2019   c = matB->A;
2020   d = matB->B;
2021 
2022   PetscCall(MatEqual(a, c, &flg));
2023   if (flg) PetscCall(MatEqual(b, d, &flg));
2024   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2025   PetscFunctionReturn(PETSC_SUCCESS);
2026 }
2027 
2028 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2029 {
2030   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2031   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2032 
2033   PetscFunctionBegin;
2034   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2035   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2036     /* because of the column compression in the off-processor part of the matrix a->B,
2037        the number of columns in a->B and b->B may be different, hence we cannot call
2038        the MatCopy() directly on the two parts. If need be, we can provide a more
2039        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2040        then copying the submatrices */
2041     PetscCall(MatCopy_Basic(A, B, str));
2042   } else {
2043     PetscCall(MatCopy(a->A, b->A, str));
2044     PetscCall(MatCopy(a->B, b->B, str));
2045   }
2046   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2047   PetscFunctionReturn(PETSC_SUCCESS);
2048 }
2049 
2050 /*
2051    Computes the number of nonzeros per row needed for preallocation when X and Y
2052    have different nonzero structure.
2053 */
2054 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2055 {
2056   PetscInt i, j, k, nzx, nzy;
2057 
2058   PetscFunctionBegin;
2059   /* Set the number of nonzeros in the new matrix */
2060   for (i = 0; i < m; i++) {
2061     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2062     nzx    = xi[i + 1] - xi[i];
2063     nzy    = yi[i + 1] - yi[i];
2064     nnz[i] = 0;
2065     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2066       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2067       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2068       nnz[i]++;
2069     }
2070     for (; k < nzy; k++) nnz[i]++;
2071   }
2072   PetscFunctionReturn(PETSC_SUCCESS);
2073 }
2074 
2075 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2076 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2077 {
2078   PetscInt    m = Y->rmap->N;
2079   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2080   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2081 
2082   PetscFunctionBegin;
2083   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2084   PetscFunctionReturn(PETSC_SUCCESS);
2085 }
2086 
2087 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2088 {
2089   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2090 
2091   PetscFunctionBegin;
2092   if (str == SAME_NONZERO_PATTERN) {
2093     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2094     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2095   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2096     PetscCall(MatAXPY_Basic(Y, a, X, str));
2097   } else {
2098     Mat       B;
2099     PetscInt *nnz_d, *nnz_o;
2100 
2101     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2102     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2103     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2104     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2105     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2106     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2107     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2108     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2109     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2110     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2111     PetscCall(MatHeaderMerge(Y, &B));
2112     PetscCall(PetscFree(nnz_d));
2113     PetscCall(PetscFree(nnz_o));
2114   }
2115   PetscFunctionReturn(PETSC_SUCCESS);
2116 }
2117 
2118 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2119 
2120 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2121 {
2122   PetscFunctionBegin;
2123   if (PetscDefined(USE_COMPLEX)) {
2124     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2125 
2126     PetscCall(MatConjugate_SeqAIJ(aij->A));
2127     PetscCall(MatConjugate_SeqAIJ(aij->B));
2128   }
2129   PetscFunctionReturn(PETSC_SUCCESS);
2130 }
2131 
2132 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2133 {
2134   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2135 
2136   PetscFunctionBegin;
2137   PetscCall(MatRealPart(a->A));
2138   PetscCall(MatRealPart(a->B));
2139   PetscFunctionReturn(PETSC_SUCCESS);
2140 }
2141 
2142 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2143 {
2144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2145 
2146   PetscFunctionBegin;
2147   PetscCall(MatImaginaryPart(a->A));
2148   PetscCall(MatImaginaryPart(a->B));
2149   PetscFunctionReturn(PETSC_SUCCESS);
2150 }
2151 
2152 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2153 {
2154   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2155   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2156   PetscScalar       *vv;
2157   Vec                vB, vA;
2158   const PetscScalar *va, *vb;
2159 
2160   PetscFunctionBegin;
2161   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2162   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2163 
2164   PetscCall(VecGetArrayRead(vA, &va));
2165   if (idx) {
2166     for (i = 0; i < m; i++) {
2167       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2168     }
2169   }
2170 
2171   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2172   PetscCall(PetscMalloc1(m, &idxb));
2173   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2174 
2175   PetscCall(VecGetArrayWrite(v, &vv));
2176   PetscCall(VecGetArrayRead(vB, &vb));
2177   for (i = 0; i < m; i++) {
2178     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2179       vv[i] = vb[i];
2180       if (idx) idx[i] = a->garray[idxb[i]];
2181     } else {
2182       vv[i] = va[i];
2183       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2184     }
2185   }
2186   PetscCall(VecRestoreArrayWrite(v, &vv));
2187   PetscCall(VecRestoreArrayRead(vA, &va));
2188   PetscCall(VecRestoreArrayRead(vB, &vb));
2189   PetscCall(PetscFree(idxb));
2190   PetscCall(VecDestroy(&vA));
2191   PetscCall(VecDestroy(&vB));
2192   PetscFunctionReturn(PETSC_SUCCESS);
2193 }
2194 
2195 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2196 {
2197   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2198   Vec         vB, vA;
2199 
2200   PetscFunctionBegin;
2201   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2202   PetscCall(MatGetRowSumAbs(a->A, vA));
2203   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2204   PetscCall(MatGetRowSumAbs(a->B, vB));
2205   PetscCall(VecAXPY(vA, 1.0, vB));
2206   PetscCall(VecDestroy(&vB));
2207   PetscCall(VecCopy(vA, v));
2208   PetscCall(VecDestroy(&vA));
2209   PetscFunctionReturn(PETSC_SUCCESS);
2210 }
2211 
2212 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2213 {
2214   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2215   PetscInt           m = A->rmap->n, n = A->cmap->n;
2216   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2217   PetscInt          *cmap = mat->garray;
2218   PetscInt          *diagIdx, *offdiagIdx;
2219   Vec                diagV, offdiagV;
2220   PetscScalar       *a, *diagA, *offdiagA;
2221   const PetscScalar *ba, *bav;
2222   PetscInt           r, j, col, ncols, *bi, *bj;
2223   Mat                B = mat->B;
2224   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2225 
2226   PetscFunctionBegin;
2227   /* When a process holds entire A and other processes have no entry */
2228   if (A->cmap->N == n) {
2229     PetscCall(VecGetArrayWrite(v, &diagA));
2230     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2231     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2232     PetscCall(VecDestroy(&diagV));
2233     PetscCall(VecRestoreArrayWrite(v, &diagA));
2234     PetscFunctionReturn(PETSC_SUCCESS);
2235   } else if (n == 0) {
2236     if (m) {
2237       PetscCall(VecGetArrayWrite(v, &a));
2238       for (r = 0; r < m; r++) {
2239         a[r] = 0.0;
2240         if (idx) idx[r] = -1;
2241       }
2242       PetscCall(VecRestoreArrayWrite(v, &a));
2243     }
2244     PetscFunctionReturn(PETSC_SUCCESS);
2245   }
2246 
2247   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2248   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2249   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2250   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2251 
2252   /* Get offdiagIdx[] for implicit 0.0 */
2253   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2254   ba = bav;
2255   bi = b->i;
2256   bj = b->j;
2257   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2258   for (r = 0; r < m; r++) {
2259     ncols = bi[r + 1] - bi[r];
2260     if (ncols == A->cmap->N - n) { /* Brow is dense */
2261       offdiagA[r]   = *ba;
2262       offdiagIdx[r] = cmap[0];
2263     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2264       offdiagA[r] = 0.0;
2265 
2266       /* Find first hole in the cmap */
2267       for (j = 0; j < ncols; j++) {
2268         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2269         if (col > j && j < cstart) {
2270           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2271           break;
2272         } else if (col > j + n && j >= cstart) {
2273           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2274           break;
2275         }
2276       }
2277       if (j == ncols && ncols < A->cmap->N - n) {
2278         /* a hole is outside compressed Bcols */
2279         if (ncols == 0) {
2280           if (cstart) {
2281             offdiagIdx[r] = 0;
2282           } else offdiagIdx[r] = cend;
2283         } else { /* ncols > 0 */
2284           offdiagIdx[r] = cmap[ncols - 1] + 1;
2285           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2286         }
2287       }
2288     }
2289 
2290     for (j = 0; j < ncols; j++) {
2291       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2292         offdiagA[r]   = *ba;
2293         offdiagIdx[r] = cmap[*bj];
2294       }
2295       ba++;
2296       bj++;
2297     }
2298   }
2299 
2300   PetscCall(VecGetArrayWrite(v, &a));
2301   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2302   for (r = 0; r < m; ++r) {
2303     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2304       a[r] = diagA[r];
2305       if (idx) idx[r] = cstart + diagIdx[r];
2306     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2307       a[r] = diagA[r];
2308       if (idx) {
2309         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2310           idx[r] = cstart + diagIdx[r];
2311         } else idx[r] = offdiagIdx[r];
2312       }
2313     } else {
2314       a[r] = offdiagA[r];
2315       if (idx) idx[r] = offdiagIdx[r];
2316     }
2317   }
2318   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2319   PetscCall(VecRestoreArrayWrite(v, &a));
2320   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2321   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2322   PetscCall(VecDestroy(&diagV));
2323   PetscCall(VecDestroy(&offdiagV));
2324   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2325   PetscFunctionReturn(PETSC_SUCCESS);
2326 }
2327 
2328 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2329 {
2330   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2331   PetscInt           m = A->rmap->n, n = A->cmap->n;
2332   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2333   PetscInt          *cmap = mat->garray;
2334   PetscInt          *diagIdx, *offdiagIdx;
2335   Vec                diagV, offdiagV;
2336   PetscScalar       *a, *diagA, *offdiagA;
2337   const PetscScalar *ba, *bav;
2338   PetscInt           r, j, col, ncols, *bi, *bj;
2339   Mat                B = mat->B;
2340   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2341 
2342   PetscFunctionBegin;
2343   /* When a process holds entire A and other processes have no entry */
2344   if (A->cmap->N == n) {
2345     PetscCall(VecGetArrayWrite(v, &diagA));
2346     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2347     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2348     PetscCall(VecDestroy(&diagV));
2349     PetscCall(VecRestoreArrayWrite(v, &diagA));
2350     PetscFunctionReturn(PETSC_SUCCESS);
2351   } else if (n == 0) {
2352     if (m) {
2353       PetscCall(VecGetArrayWrite(v, &a));
2354       for (r = 0; r < m; r++) {
2355         a[r] = PETSC_MAX_REAL;
2356         if (idx) idx[r] = -1;
2357       }
2358       PetscCall(VecRestoreArrayWrite(v, &a));
2359     }
2360     PetscFunctionReturn(PETSC_SUCCESS);
2361   }
2362 
2363   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2364   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2365   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2366   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2367 
2368   /* Get offdiagIdx[] for implicit 0.0 */
2369   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2370   ba = bav;
2371   bi = b->i;
2372   bj = b->j;
2373   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2374   for (r = 0; r < m; r++) {
2375     ncols = bi[r + 1] - bi[r];
2376     if (ncols == A->cmap->N - n) { /* Brow is dense */
2377       offdiagA[r]   = *ba;
2378       offdiagIdx[r] = cmap[0];
2379     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2380       offdiagA[r] = 0.0;
2381 
2382       /* Find first hole in the cmap */
2383       for (j = 0; j < ncols; j++) {
2384         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2385         if (col > j && j < cstart) {
2386           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2387           break;
2388         } else if (col > j + n && j >= cstart) {
2389           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2390           break;
2391         }
2392       }
2393       if (j == ncols && ncols < A->cmap->N - n) {
2394         /* a hole is outside compressed Bcols */
2395         if (ncols == 0) {
2396           if (cstart) {
2397             offdiagIdx[r] = 0;
2398           } else offdiagIdx[r] = cend;
2399         } else { /* ncols > 0 */
2400           offdiagIdx[r] = cmap[ncols - 1] + 1;
2401           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2402         }
2403       }
2404     }
2405 
2406     for (j = 0; j < ncols; j++) {
2407       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2408         offdiagA[r]   = *ba;
2409         offdiagIdx[r] = cmap[*bj];
2410       }
2411       ba++;
2412       bj++;
2413     }
2414   }
2415 
2416   PetscCall(VecGetArrayWrite(v, &a));
2417   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2418   for (r = 0; r < m; ++r) {
2419     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2420       a[r] = diagA[r];
2421       if (idx) idx[r] = cstart + diagIdx[r];
2422     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2423       a[r] = diagA[r];
2424       if (idx) {
2425         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2426           idx[r] = cstart + diagIdx[r];
2427         } else idx[r] = offdiagIdx[r];
2428       }
2429     } else {
2430       a[r] = offdiagA[r];
2431       if (idx) idx[r] = offdiagIdx[r];
2432     }
2433   }
2434   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2435   PetscCall(VecRestoreArrayWrite(v, &a));
2436   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2437   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2438   PetscCall(VecDestroy(&diagV));
2439   PetscCall(VecDestroy(&offdiagV));
2440   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2441   PetscFunctionReturn(PETSC_SUCCESS);
2442 }
2443 
2444 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2445 {
2446   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2447   PetscInt           m = A->rmap->n, n = A->cmap->n;
2448   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2449   PetscInt          *cmap = mat->garray;
2450   PetscInt          *diagIdx, *offdiagIdx;
2451   Vec                diagV, offdiagV;
2452   PetscScalar       *a, *diagA, *offdiagA;
2453   const PetscScalar *ba, *bav;
2454   PetscInt           r, j, col, ncols, *bi, *bj;
2455   Mat                B = mat->B;
2456   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2457 
2458   PetscFunctionBegin;
2459   /* When a process holds entire A and other processes have no entry */
2460   if (A->cmap->N == n) {
2461     PetscCall(VecGetArrayWrite(v, &diagA));
2462     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2463     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2464     PetscCall(VecDestroy(&diagV));
2465     PetscCall(VecRestoreArrayWrite(v, &diagA));
2466     PetscFunctionReturn(PETSC_SUCCESS);
2467   } else if (n == 0) {
2468     if (m) {
2469       PetscCall(VecGetArrayWrite(v, &a));
2470       for (r = 0; r < m; r++) {
2471         a[r] = PETSC_MIN_REAL;
2472         if (idx) idx[r] = -1;
2473       }
2474       PetscCall(VecRestoreArrayWrite(v, &a));
2475     }
2476     PetscFunctionReturn(PETSC_SUCCESS);
2477   }
2478 
2479   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2480   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2481   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2482   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2483 
2484   /* Get offdiagIdx[] for implicit 0.0 */
2485   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2486   ba = bav;
2487   bi = b->i;
2488   bj = b->j;
2489   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2490   for (r = 0; r < m; r++) {
2491     ncols = bi[r + 1] - bi[r];
2492     if (ncols == A->cmap->N - n) { /* Brow is dense */
2493       offdiagA[r]   = *ba;
2494       offdiagIdx[r] = cmap[0];
2495     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2496       offdiagA[r] = 0.0;
2497 
2498       /* Find first hole in the cmap */
2499       for (j = 0; j < ncols; j++) {
2500         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2501         if (col > j && j < cstart) {
2502           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2503           break;
2504         } else if (col > j + n && j >= cstart) {
2505           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2506           break;
2507         }
2508       }
2509       if (j == ncols && ncols < A->cmap->N - n) {
2510         /* a hole is outside compressed Bcols */
2511         if (ncols == 0) {
2512           if (cstart) {
2513             offdiagIdx[r] = 0;
2514           } else offdiagIdx[r] = cend;
2515         } else { /* ncols > 0 */
2516           offdiagIdx[r] = cmap[ncols - 1] + 1;
2517           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2518         }
2519       }
2520     }
2521 
2522     for (j = 0; j < ncols; j++) {
2523       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2524         offdiagA[r]   = *ba;
2525         offdiagIdx[r] = cmap[*bj];
2526       }
2527       ba++;
2528       bj++;
2529     }
2530   }
2531 
2532   PetscCall(VecGetArrayWrite(v, &a));
2533   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2534   for (r = 0; r < m; ++r) {
2535     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2536       a[r] = diagA[r];
2537       if (idx) idx[r] = cstart + diagIdx[r];
2538     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2539       a[r] = diagA[r];
2540       if (idx) {
2541         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2542           idx[r] = cstart + diagIdx[r];
2543         } else idx[r] = offdiagIdx[r];
2544       }
2545     } else {
2546       a[r] = offdiagA[r];
2547       if (idx) idx[r] = offdiagIdx[r];
2548     }
2549   }
2550   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2551   PetscCall(VecRestoreArrayWrite(v, &a));
2552   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2553   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2554   PetscCall(VecDestroy(&diagV));
2555   PetscCall(VecDestroy(&offdiagV));
2556   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2557   PetscFunctionReturn(PETSC_SUCCESS);
2558 }
2559 
2560 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2561 {
2562   Mat *dummy;
2563 
2564   PetscFunctionBegin;
2565   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2566   *newmat = *dummy;
2567   PetscCall(PetscFree(dummy));
2568   PetscFunctionReturn(PETSC_SUCCESS);
2569 }
2570 
2571 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2572 {
2573   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2574 
2575   PetscFunctionBegin;
2576   PetscCall(MatInvertBlockDiagonal(a->A, values));
2577   A->factorerrortype = a->A->factorerrortype;
2578   PetscFunctionReturn(PETSC_SUCCESS);
2579 }
2580 
2581 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2582 {
2583   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2584 
2585   PetscFunctionBegin;
2586   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2587   PetscCall(MatSetRandom(aij->A, rctx));
2588   if (x->assembled) {
2589     PetscCall(MatSetRandom(aij->B, rctx));
2590   } else {
2591     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2592   }
2593   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2594   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2595   PetscFunctionReturn(PETSC_SUCCESS);
2596 }
2597 
2598 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2599 {
2600   PetscFunctionBegin;
2601   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2602   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2603   PetscFunctionReturn(PETSC_SUCCESS);
2604 }
2605 
2606 /*@
2607   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2608 
2609   Not Collective
2610 
2611   Input Parameter:
2612 . A - the matrix
2613 
2614   Output Parameter:
2615 . nz - the number of nonzeros
2616 
2617   Level: advanced
2618 
2619 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2620 @*/
2621 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2622 {
2623   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2624   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2625   PetscBool   isaij;
2626 
2627   PetscFunctionBegin;
2628   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2629   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2630   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2631   PetscFunctionReturn(PETSC_SUCCESS);
2632 }
2633 
2634 /*@
2635   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2636 
2637   Collective
2638 
2639   Input Parameters:
2640 + A  - the matrix
2641 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2642 
2643   Level: advanced
2644 
2645 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2646 @*/
2647 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2648 {
2649   PetscFunctionBegin;
2650   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2651   PetscFunctionReturn(PETSC_SUCCESS);
2652 }
2653 
2654 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2655 {
2656   PetscBool sc = PETSC_FALSE, flg;
2657 
2658   PetscFunctionBegin;
2659   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2660   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2661   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2662   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2663   PetscOptionsHeadEnd();
2664   PetscFunctionReturn(PETSC_SUCCESS);
2665 }
2666 
2667 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2668 {
2669   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2670   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2671 
2672   PetscFunctionBegin;
2673   if (!Y->preallocated) {
2674     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2675   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2676     PetscInt nonew = aij->nonew;
2677     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2678     aij->nonew = nonew;
2679   }
2680   PetscCall(MatShift_Basic(Y, a));
2681   PetscFunctionReturn(PETSC_SUCCESS);
2682 }
2683 
2684 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2685 {
2686   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2687 
2688   PetscFunctionBegin;
2689   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2690   PetscCall(MatMissingDiagonal(a->A, missing, d));
2691   if (d) {
2692     PetscInt rstart;
2693     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2694     *d += rstart;
2695   }
2696   PetscFunctionReturn(PETSC_SUCCESS);
2697 }
2698 
2699 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2700 {
2701   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2702 
2703   PetscFunctionBegin;
2704   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2705   PetscFunctionReturn(PETSC_SUCCESS);
2706 }
2707 
2708 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2709 {
2710   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2711 
2712   PetscFunctionBegin;
2713   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2714   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2715   PetscFunctionReturn(PETSC_SUCCESS);
2716 }
2717 
2718 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2719                                        MatGetRow_MPIAIJ,
2720                                        MatRestoreRow_MPIAIJ,
2721                                        MatMult_MPIAIJ,
2722                                        /* 4*/ MatMultAdd_MPIAIJ,
2723                                        MatMultTranspose_MPIAIJ,
2724                                        MatMultTransposeAdd_MPIAIJ,
2725                                        NULL,
2726                                        NULL,
2727                                        NULL,
2728                                        /*10*/ NULL,
2729                                        NULL,
2730                                        NULL,
2731                                        MatSOR_MPIAIJ,
2732                                        MatTranspose_MPIAIJ,
2733                                        /*15*/ MatGetInfo_MPIAIJ,
2734                                        MatEqual_MPIAIJ,
2735                                        MatGetDiagonal_MPIAIJ,
2736                                        MatDiagonalScale_MPIAIJ,
2737                                        MatNorm_MPIAIJ,
2738                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2739                                        MatAssemblyEnd_MPIAIJ,
2740                                        MatSetOption_MPIAIJ,
2741                                        MatZeroEntries_MPIAIJ,
2742                                        /*24*/ MatZeroRows_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        NULL,
2747                                        /*29*/ MatSetUp_MPI_Hash,
2748                                        NULL,
2749                                        NULL,
2750                                        MatGetDiagonalBlock_MPIAIJ,
2751                                        NULL,
2752                                        /*34*/ MatDuplicate_MPIAIJ,
2753                                        NULL,
2754                                        NULL,
2755                                        NULL,
2756                                        NULL,
2757                                        /*39*/ MatAXPY_MPIAIJ,
2758                                        MatCreateSubMatrices_MPIAIJ,
2759                                        MatIncreaseOverlap_MPIAIJ,
2760                                        MatGetValues_MPIAIJ,
2761                                        MatCopy_MPIAIJ,
2762                                        /*44*/ MatGetRowMax_MPIAIJ,
2763                                        MatScale_MPIAIJ,
2764                                        MatShift_MPIAIJ,
2765                                        MatDiagonalSet_MPIAIJ,
2766                                        MatZeroRowsColumns_MPIAIJ,
2767                                        /*49*/ MatSetRandom_MPIAIJ,
2768                                        MatGetRowIJ_MPIAIJ,
2769                                        MatRestoreRowIJ_MPIAIJ,
2770                                        NULL,
2771                                        NULL,
2772                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2773                                        NULL,
2774                                        MatSetUnfactored_MPIAIJ,
2775                                        MatPermute_MPIAIJ,
2776                                        NULL,
2777                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2778                                        MatDestroy_MPIAIJ,
2779                                        MatView_MPIAIJ,
2780                                        NULL,
2781                                        NULL,
2782                                        /*64*/ NULL,
2783                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2784                                        NULL,
2785                                        NULL,
2786                                        NULL,
2787                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2788                                        MatGetRowMinAbs_MPIAIJ,
2789                                        NULL,
2790                                        NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*75*/ MatFDColoringApply_AIJ,
2794                                        MatSetFromOptions_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        MatFindZeroDiagonals_MPIAIJ,
2798                                        /*80*/ NULL,
2799                                        NULL,
2800                                        NULL,
2801                                        /*83*/ MatLoad_MPIAIJ,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        /*89*/ NULL,
2808                                        NULL,
2809                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2813                                        NULL,
2814                                        NULL,
2815                                        NULL,
2816                                        MatBindToCPU_MPIAIJ,
2817                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2818                                        NULL,
2819                                        NULL,
2820                                        MatConjugate_MPIAIJ,
2821                                        NULL,
2822                                        /*104*/ MatSetValuesRow_MPIAIJ,
2823                                        MatRealPart_MPIAIJ,
2824                                        MatImaginaryPart_MPIAIJ,
2825                                        NULL,
2826                                        NULL,
2827                                        /*109*/ NULL,
2828                                        NULL,
2829                                        MatGetRowMin_MPIAIJ,
2830                                        NULL,
2831                                        MatMissingDiagonal_MPIAIJ,
2832                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2833                                        NULL,
2834                                        MatGetGhosts_MPIAIJ,
2835                                        NULL,
2836                                        NULL,
2837                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2838                                        NULL,
2839                                        NULL,
2840                                        NULL,
2841                                        MatGetMultiProcBlock_MPIAIJ,
2842                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2843                                        MatGetColumnReductions_MPIAIJ,
2844                                        MatInvertBlockDiagonal_MPIAIJ,
2845                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2846                                        MatCreateSubMatricesMPI_MPIAIJ,
2847                                        /*129*/ NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2851                                        NULL,
2852                                        /*134*/ NULL,
2853                                        NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        NULL,
2857                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2858                                        NULL,
2859                                        NULL,
2860                                        MatFDColoringSetUp_MPIXAIJ,
2861                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2862                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2863                                        /*145*/ NULL,
2864                                        NULL,
2865                                        NULL,
2866                                        MatCreateGraph_Simple_AIJ,
2867                                        NULL,
2868                                        /*150*/ NULL,
2869                                        MatEliminateZeros_MPIAIJ,
2870                                        MatGetRowSumAbs_MPIAIJ,
2871                                        NULL,
2872                                        NULL,
2873                                        NULL};
2874 
2875 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2876 {
2877   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2878 
2879   PetscFunctionBegin;
2880   PetscCall(MatStoreValues(aij->A));
2881   PetscCall(MatStoreValues(aij->B));
2882   PetscFunctionReturn(PETSC_SUCCESS);
2883 }
2884 
2885 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2886 {
2887   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2888 
2889   PetscFunctionBegin;
2890   PetscCall(MatRetrieveValues(aij->A));
2891   PetscCall(MatRetrieveValues(aij->B));
2892   PetscFunctionReturn(PETSC_SUCCESS);
2893 }
2894 
2895 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2896 {
2897   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2898   PetscMPIInt size;
2899 
2900   PetscFunctionBegin;
2901   if (B->hash_active) {
2902     B->ops[0]      = b->cops;
2903     B->hash_active = PETSC_FALSE;
2904   }
2905   PetscCall(PetscLayoutSetUp(B->rmap));
2906   PetscCall(PetscLayoutSetUp(B->cmap));
2907 
2908 #if defined(PETSC_USE_CTABLE)
2909   PetscCall(PetscHMapIDestroy(&b->colmap));
2910 #else
2911   PetscCall(PetscFree(b->colmap));
2912 #endif
2913   PetscCall(PetscFree(b->garray));
2914   PetscCall(VecDestroy(&b->lvec));
2915   PetscCall(VecScatterDestroy(&b->Mvctx));
2916 
2917   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2918 
2919   MatSeqXAIJGetOptions_Private(b->B);
2920   PetscCall(MatDestroy(&b->B));
2921   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2922   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2923   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2924   PetscCall(MatSetType(b->B, MATSEQAIJ));
2925   MatSeqXAIJRestoreOptions_Private(b->B);
2926 
2927   MatSeqXAIJGetOptions_Private(b->A);
2928   PetscCall(MatDestroy(&b->A));
2929   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2930   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2931   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2932   PetscCall(MatSetType(b->A, MATSEQAIJ));
2933   MatSeqXAIJRestoreOptions_Private(b->A);
2934 
2935   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2936   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2937   B->preallocated  = PETSC_TRUE;
2938   B->was_assembled = PETSC_FALSE;
2939   B->assembled     = PETSC_FALSE;
2940   PetscFunctionReturn(PETSC_SUCCESS);
2941 }
2942 
2943 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2944 {
2945   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2946 
2947   PetscFunctionBegin;
2948   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2949   PetscCall(PetscLayoutSetUp(B->rmap));
2950   PetscCall(PetscLayoutSetUp(B->cmap));
2951   if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2952   else {
2953 #if defined(PETSC_USE_CTABLE)
2954     PetscCall(PetscHMapIDestroy(&b->colmap));
2955 #else
2956     PetscCall(PetscFree(b->colmap));
2957 #endif
2958     PetscCall(PetscFree(b->garray));
2959     PetscCall(VecDestroy(&b->lvec));
2960   }
2961   PetscCall(VecScatterDestroy(&b->Mvctx));
2962 
2963   PetscCall(MatResetPreallocation(b->A));
2964   PetscCall(MatResetPreallocation(b->B));
2965   B->preallocated  = PETSC_TRUE;
2966   B->was_assembled = PETSC_FALSE;
2967   B->assembled     = PETSC_FALSE;
2968   PetscFunctionReturn(PETSC_SUCCESS);
2969 }
2970 
2971 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2972 {
2973   Mat         mat;
2974   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2975 
2976   PetscFunctionBegin;
2977   *newmat = NULL;
2978   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2979   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2980   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2981   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2982   a = (Mat_MPIAIJ *)mat->data;
2983 
2984   mat->factortype = matin->factortype;
2985   mat->assembled  = matin->assembled;
2986   mat->insertmode = NOT_SET_VALUES;
2987 
2988   a->size         = oldmat->size;
2989   a->rank         = oldmat->rank;
2990   a->donotstash   = oldmat->donotstash;
2991   a->roworiented  = oldmat->roworiented;
2992   a->rowindices   = NULL;
2993   a->rowvalues    = NULL;
2994   a->getrowactive = PETSC_FALSE;
2995 
2996   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2997   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2998   if (matin->hash_active) {
2999     PetscCall(MatSetUp(mat));
3000   } else {
3001     mat->preallocated = matin->preallocated;
3002     if (oldmat->colmap) {
3003 #if defined(PETSC_USE_CTABLE)
3004       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3005 #else
3006       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3007       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3008 #endif
3009     } else a->colmap = NULL;
3010     if (oldmat->garray) {
3011       PetscInt len;
3012       len = oldmat->B->cmap->n;
3013       PetscCall(PetscMalloc1(len + 1, &a->garray));
3014       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3015     } else a->garray = NULL;
3016 
3017     /* It may happen MatDuplicate is called with a non-assembled matrix
3018       In fact, MatDuplicate only requires the matrix to be preallocated
3019       This may happen inside a DMCreateMatrix_Shell */
3020     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3021     if (oldmat->Mvctx) {
3022       a->Mvctx = oldmat->Mvctx;
3023       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3024     }
3025     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3026     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3027   }
3028   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3029   *newmat = mat;
3030   PetscFunctionReturn(PETSC_SUCCESS);
3031 }
3032 
3033 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3034 {
3035   PetscBool isbinary, ishdf5;
3036 
3037   PetscFunctionBegin;
3038   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3039   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3040   /* force binary viewer to load .info file if it has not yet done so */
3041   PetscCall(PetscViewerSetUp(viewer));
3042   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3043   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3044   if (isbinary) {
3045     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3046   } else if (ishdf5) {
3047 #if defined(PETSC_HAVE_HDF5)
3048     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3049 #else
3050     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3051 #endif
3052   } else {
3053     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3054   }
3055   PetscFunctionReturn(PETSC_SUCCESS);
3056 }
3057 
3058 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3059 {
3060   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3061   PetscInt    *rowidxs, *colidxs;
3062   PetscScalar *matvals;
3063 
3064   PetscFunctionBegin;
3065   PetscCall(PetscViewerSetUp(viewer));
3066 
3067   /* read in matrix header */
3068   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3069   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3070   M  = header[1];
3071   N  = header[2];
3072   nz = header[3];
3073   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3074   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3075   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3076 
3077   /* set block sizes from the viewer's .info file */
3078   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3079   /* set global sizes if not set already */
3080   if (mat->rmap->N < 0) mat->rmap->N = M;
3081   if (mat->cmap->N < 0) mat->cmap->N = N;
3082   PetscCall(PetscLayoutSetUp(mat->rmap));
3083   PetscCall(PetscLayoutSetUp(mat->cmap));
3084 
3085   /* check if the matrix sizes are correct */
3086   PetscCall(MatGetSize(mat, &rows, &cols));
3087   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3088 
3089   /* read in row lengths and build row indices */
3090   PetscCall(MatGetLocalSize(mat, &m, NULL));
3091   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3092   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3093   rowidxs[0] = 0;
3094   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3095   if (nz != PETSC_INT_MAX) {
3096     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3097     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3098   }
3099 
3100   /* read in column indices and matrix values */
3101   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3102   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3103   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3104   /* store matrix indices and values */
3105   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3106   PetscCall(PetscFree(rowidxs));
3107   PetscCall(PetscFree2(colidxs, matvals));
3108   PetscFunctionReturn(PETSC_SUCCESS);
3109 }
3110 
3111 /* Not scalable because of ISAllGather() unless getting all columns. */
3112 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3113 {
3114   IS          iscol_local;
3115   PetscBool   isstride;
3116   PetscMPIInt gisstride = 0;
3117 
3118   PetscFunctionBegin;
3119   /* check if we are grabbing all columns*/
3120   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3121 
3122   if (isstride) {
3123     PetscInt start, len, mstart, mlen;
3124     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3125     PetscCall(ISGetLocalSize(iscol, &len));
3126     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3127     if (mstart == start && mlen - mstart == len) gisstride = 1;
3128   }
3129 
3130   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3131   if (gisstride) {
3132     PetscInt N;
3133     PetscCall(MatGetSize(mat, NULL, &N));
3134     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3135     PetscCall(ISSetIdentity(iscol_local));
3136     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3137   } else {
3138     PetscInt cbs;
3139     PetscCall(ISGetBlockSize(iscol, &cbs));
3140     PetscCall(ISAllGather(iscol, &iscol_local));
3141     PetscCall(ISSetBlockSize(iscol_local, cbs));
3142   }
3143 
3144   *isseq = iscol_local;
3145   PetscFunctionReturn(PETSC_SUCCESS);
3146 }
3147 
3148 /*
3149  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3150  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3151 
3152  Input Parameters:
3153 +   mat - matrix
3154 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3155            i.e., mat->rstart <= isrow[i] < mat->rend
3156 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3157            i.e., mat->cstart <= iscol[i] < mat->cend
3158 
3159  Output Parameters:
3160 +   isrow_d - sequential row index set for retrieving mat->A
3161 .   iscol_d - sequential  column index set for retrieving mat->A
3162 .   iscol_o - sequential column index set for retrieving mat->B
3163 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3164  */
3165 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3166 {
3167   Vec             x, cmap;
3168   const PetscInt *is_idx;
3169   PetscScalar    *xarray, *cmaparray;
3170   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3171   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3172   Mat             B    = a->B;
3173   Vec             lvec = a->lvec, lcmap;
3174   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3175   MPI_Comm        comm;
3176   VecScatter      Mvctx = a->Mvctx;
3177 
3178   PetscFunctionBegin;
3179   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3180   PetscCall(ISGetLocalSize(iscol, &ncols));
3181 
3182   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3183   PetscCall(MatCreateVecs(mat, &x, NULL));
3184   PetscCall(VecSet(x, -1.0));
3185   PetscCall(VecDuplicate(x, &cmap));
3186   PetscCall(VecSet(cmap, -1.0));
3187 
3188   /* Get start indices */
3189   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3190   isstart -= ncols;
3191   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3192 
3193   PetscCall(ISGetIndices(iscol, &is_idx));
3194   PetscCall(VecGetArray(x, &xarray));
3195   PetscCall(VecGetArray(cmap, &cmaparray));
3196   PetscCall(PetscMalloc1(ncols, &idx));
3197   for (i = 0; i < ncols; i++) {
3198     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3199     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3200     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3201   }
3202   PetscCall(VecRestoreArray(x, &xarray));
3203   PetscCall(VecRestoreArray(cmap, &cmaparray));
3204   PetscCall(ISRestoreIndices(iscol, &is_idx));
3205 
3206   /* Get iscol_d */
3207   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3208   PetscCall(ISGetBlockSize(iscol, &i));
3209   PetscCall(ISSetBlockSize(*iscol_d, i));
3210 
3211   /* Get isrow_d */
3212   PetscCall(ISGetLocalSize(isrow, &m));
3213   rstart = mat->rmap->rstart;
3214   PetscCall(PetscMalloc1(m, &idx));
3215   PetscCall(ISGetIndices(isrow, &is_idx));
3216   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3217   PetscCall(ISRestoreIndices(isrow, &is_idx));
3218 
3219   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3220   PetscCall(ISGetBlockSize(isrow, &i));
3221   PetscCall(ISSetBlockSize(*isrow_d, i));
3222 
3223   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3224   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3225   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3226 
3227   PetscCall(VecDuplicate(lvec, &lcmap));
3228 
3229   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3230   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3231 
3232   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3233   /* off-process column indices */
3234   count = 0;
3235   PetscCall(PetscMalloc1(Bn, &idx));
3236   PetscCall(PetscMalloc1(Bn, &cmap1));
3237 
3238   PetscCall(VecGetArray(lvec, &xarray));
3239   PetscCall(VecGetArray(lcmap, &cmaparray));
3240   for (i = 0; i < Bn; i++) {
3241     if (PetscRealPart(xarray[i]) > -1.0) {
3242       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3243       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3244       count++;
3245     }
3246   }
3247   PetscCall(VecRestoreArray(lvec, &xarray));
3248   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3249 
3250   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3251   /* cannot ensure iscol_o has same blocksize as iscol! */
3252 
3253   PetscCall(PetscFree(idx));
3254   *garray = cmap1;
3255 
3256   PetscCall(VecDestroy(&x));
3257   PetscCall(VecDestroy(&cmap));
3258   PetscCall(VecDestroy(&lcmap));
3259   PetscFunctionReturn(PETSC_SUCCESS);
3260 }
3261 
3262 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3263 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3264 {
3265   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3266   Mat         M = NULL;
3267   MPI_Comm    comm;
3268   IS          iscol_d, isrow_d, iscol_o;
3269   Mat         Asub = NULL, Bsub = NULL;
3270   PetscInt    n;
3271 
3272   PetscFunctionBegin;
3273   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3274 
3275   if (call == MAT_REUSE_MATRIX) {
3276     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3277     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3278     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3279 
3280     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3281     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3282 
3283     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3284     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3285 
3286     /* Update diagonal and off-diagonal portions of submat */
3287     asub = (Mat_MPIAIJ *)(*submat)->data;
3288     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3289     PetscCall(ISGetLocalSize(iscol_o, &n));
3290     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3291     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3292     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3293 
3294   } else { /* call == MAT_INITIAL_MATRIX) */
3295     const PetscInt *garray;
3296     PetscInt        BsubN;
3297 
3298     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3299     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3300 
3301     /* Create local submatrices Asub and Bsub */
3302     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3303     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3304 
3305     /* Create submatrix M */
3306     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3307 
3308     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3309     asub = (Mat_MPIAIJ *)M->data;
3310 
3311     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3312     n = asub->B->cmap->N;
3313     if (BsubN > n) {
3314       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3315       const PetscInt *idx;
3316       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3317       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3318 
3319       PetscCall(PetscMalloc1(n, &idx_new));
3320       j = 0;
3321       PetscCall(ISGetIndices(iscol_o, &idx));
3322       for (i = 0; i < n; i++) {
3323         if (j >= BsubN) break;
3324         while (subgarray[i] > garray[j]) j++;
3325 
3326         if (subgarray[i] == garray[j]) {
3327           idx_new[i] = idx[j++];
3328         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3329       }
3330       PetscCall(ISRestoreIndices(iscol_o, &idx));
3331 
3332       PetscCall(ISDestroy(&iscol_o));
3333       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3334 
3335     } else if (BsubN < n) {
3336       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3337     }
3338 
3339     PetscCall(PetscFree(garray));
3340     *submat = M;
3341 
3342     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3343     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3344     PetscCall(ISDestroy(&isrow_d));
3345 
3346     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3347     PetscCall(ISDestroy(&iscol_d));
3348 
3349     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3350     PetscCall(ISDestroy(&iscol_o));
3351   }
3352   PetscFunctionReturn(PETSC_SUCCESS);
3353 }
3354 
3355 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3356 {
3357   IS        iscol_local = NULL, isrow_d;
3358   PetscInt  csize;
3359   PetscInt  n, i, j, start, end;
3360   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3361   MPI_Comm  comm;
3362 
3363   PetscFunctionBegin;
3364   /* If isrow has same processor distribution as mat,
3365      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3366   if (call == MAT_REUSE_MATRIX) {
3367     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3368     if (isrow_d) {
3369       sameRowDist  = PETSC_TRUE;
3370       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3371     } else {
3372       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3373       if (iscol_local) {
3374         sameRowDist  = PETSC_TRUE;
3375         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3376       }
3377     }
3378   } else {
3379     /* Check if isrow has same processor distribution as mat */
3380     sameDist[0] = PETSC_FALSE;
3381     PetscCall(ISGetLocalSize(isrow, &n));
3382     if (!n) {
3383       sameDist[0] = PETSC_TRUE;
3384     } else {
3385       PetscCall(ISGetMinMax(isrow, &i, &j));
3386       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3387       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3388     }
3389 
3390     /* Check if iscol has same processor distribution as mat */
3391     sameDist[1] = PETSC_FALSE;
3392     PetscCall(ISGetLocalSize(iscol, &n));
3393     if (!n) {
3394       sameDist[1] = PETSC_TRUE;
3395     } else {
3396       PetscCall(ISGetMinMax(iscol, &i, &j));
3397       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3398       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3399     }
3400 
3401     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3402     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3403     sameRowDist = tsameDist[0];
3404   }
3405 
3406   if (sameRowDist) {
3407     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3408       /* isrow and iscol have same processor distribution as mat */
3409       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3410       PetscFunctionReturn(PETSC_SUCCESS);
3411     } else { /* sameRowDist */
3412       /* isrow has same processor distribution as mat */
3413       if (call == MAT_INITIAL_MATRIX) {
3414         PetscBool sorted;
3415         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3416         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3417         PetscCall(ISGetSize(iscol, &i));
3418         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3419 
3420         PetscCall(ISSorted(iscol_local, &sorted));
3421         if (sorted) {
3422           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3423           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3424           PetscFunctionReturn(PETSC_SUCCESS);
3425         }
3426       } else { /* call == MAT_REUSE_MATRIX */
3427         IS iscol_sub;
3428         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3429         if (iscol_sub) {
3430           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3431           PetscFunctionReturn(PETSC_SUCCESS);
3432         }
3433       }
3434     }
3435   }
3436 
3437   /* General case: iscol -> iscol_local which has global size of iscol */
3438   if (call == MAT_REUSE_MATRIX) {
3439     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3440     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3441   } else {
3442     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3443   }
3444 
3445   PetscCall(ISGetLocalSize(iscol, &csize));
3446   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3447 
3448   if (call == MAT_INITIAL_MATRIX) {
3449     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3450     PetscCall(ISDestroy(&iscol_local));
3451   }
3452   PetscFunctionReturn(PETSC_SUCCESS);
3453 }
3454 
3455 /*@C
3456   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3457   and "off-diagonal" part of the matrix in CSR format.
3458 
3459   Collective
3460 
3461   Input Parameters:
3462 + comm   - MPI communicator
3463 . A      - "diagonal" portion of matrix
3464 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3465 - garray - global index of `B` columns
3466 
3467   Output Parameter:
3468 . mat - the matrix, with input `A` as its local diagonal matrix
3469 
3470   Level: advanced
3471 
3472   Notes:
3473   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3474 
3475   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3476 
3477 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3478 @*/
3479 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3480 {
3481   Mat_MPIAIJ        *maij;
3482   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3483   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3484   const PetscScalar *oa;
3485   Mat                Bnew;
3486   PetscInt           m, n, N;
3487   MatType            mpi_mat_type;
3488 
3489   PetscFunctionBegin;
3490   PetscCall(MatCreate(comm, mat));
3491   PetscCall(MatGetSize(A, &m, &n));
3492   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3493   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3494   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3495   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3496 
3497   /* Get global columns of mat */
3498   PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3499 
3500   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3501   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3502   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3503   PetscCall(MatSetType(*mat, mpi_mat_type));
3504 
3505   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3506   maij = (Mat_MPIAIJ *)(*mat)->data;
3507 
3508   (*mat)->preallocated = PETSC_TRUE;
3509 
3510   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3511   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3512 
3513   /* Set A as diagonal portion of *mat */
3514   maij->A = A;
3515 
3516   nz = oi[m];
3517   for (i = 0; i < nz; i++) {
3518     col   = oj[i];
3519     oj[i] = garray[col];
3520   }
3521 
3522   /* Set Bnew as off-diagonal portion of *mat */
3523   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3524   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3525   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3526   bnew        = (Mat_SeqAIJ *)Bnew->data;
3527   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3528   maij->B     = Bnew;
3529 
3530   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3531 
3532   b->free_a  = PETSC_FALSE;
3533   b->free_ij = PETSC_FALSE;
3534   PetscCall(MatDestroy(&B));
3535 
3536   bnew->free_a  = PETSC_TRUE;
3537   bnew->free_ij = PETSC_TRUE;
3538 
3539   /* condense columns of maij->B */
3540   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3541   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3542   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3543   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3544   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3545   PetscFunctionReturn(PETSC_SUCCESS);
3546 }
3547 
3548 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3549 
3550 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3551 {
3552   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3553   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3554   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3555   Mat             M, Msub, B = a->B;
3556   MatScalar      *aa;
3557   Mat_SeqAIJ     *aij;
3558   PetscInt       *garray = a->garray, *colsub, Ncols;
3559   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3560   IS              iscol_sub, iscmap;
3561   const PetscInt *is_idx, *cmap;
3562   PetscBool       allcolumns = PETSC_FALSE;
3563   MPI_Comm        comm;
3564 
3565   PetscFunctionBegin;
3566   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3567   if (call == MAT_REUSE_MATRIX) {
3568     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3569     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3570     PetscCall(ISGetLocalSize(iscol_sub, &count));
3571 
3572     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3573     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3574 
3575     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3576     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3577 
3578     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3579 
3580   } else { /* call == MAT_INITIAL_MATRIX) */
3581     PetscBool flg;
3582 
3583     PetscCall(ISGetLocalSize(iscol, &n));
3584     PetscCall(ISGetSize(iscol, &Ncols));
3585 
3586     /* (1) iscol -> nonscalable iscol_local */
3587     /* Check for special case: each processor gets entire matrix columns */
3588     PetscCall(ISIdentity(iscol_local, &flg));
3589     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3590     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3591     if (allcolumns) {
3592       iscol_sub = iscol_local;
3593       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3594       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3595 
3596     } else {
3597       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3598       PetscInt *idx, *cmap1, k;
3599       PetscCall(PetscMalloc1(Ncols, &idx));
3600       PetscCall(PetscMalloc1(Ncols, &cmap1));
3601       PetscCall(ISGetIndices(iscol_local, &is_idx));
3602       count = 0;
3603       k     = 0;
3604       for (i = 0; i < Ncols; i++) {
3605         j = is_idx[i];
3606         if (j >= cstart && j < cend) {
3607           /* diagonal part of mat */
3608           idx[count]     = j;
3609           cmap1[count++] = i; /* column index in submat */
3610         } else if (Bn) {
3611           /* off-diagonal part of mat */
3612           if (j == garray[k]) {
3613             idx[count]     = j;
3614             cmap1[count++] = i; /* column index in submat */
3615           } else if (j > garray[k]) {
3616             while (j > garray[k] && k < Bn - 1) k++;
3617             if (j == garray[k]) {
3618               idx[count]     = j;
3619               cmap1[count++] = i; /* column index in submat */
3620             }
3621           }
3622         }
3623       }
3624       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3625 
3626       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3627       PetscCall(ISGetBlockSize(iscol, &cbs));
3628       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3629 
3630       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3631     }
3632 
3633     /* (3) Create sequential Msub */
3634     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3635   }
3636 
3637   PetscCall(ISGetLocalSize(iscol_sub, &count));
3638   aij = (Mat_SeqAIJ *)Msub->data;
3639   ii  = aij->i;
3640   PetscCall(ISGetIndices(iscmap, &cmap));
3641 
3642   /*
3643       m - number of local rows
3644       Ncols - number of columns (same on all processors)
3645       rstart - first row in new global matrix generated
3646   */
3647   PetscCall(MatGetSize(Msub, &m, NULL));
3648 
3649   if (call == MAT_INITIAL_MATRIX) {
3650     /* (4) Create parallel newmat */
3651     PetscMPIInt rank, size;
3652     PetscInt    csize;
3653 
3654     PetscCallMPI(MPI_Comm_size(comm, &size));
3655     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3656 
3657     /*
3658         Determine the number of non-zeros in the diagonal and off-diagonal
3659         portions of the matrix in order to do correct preallocation
3660     */
3661 
3662     /* first get start and end of "diagonal" columns */
3663     PetscCall(ISGetLocalSize(iscol, &csize));
3664     if (csize == PETSC_DECIDE) {
3665       PetscCall(ISGetSize(isrow, &mglobal));
3666       if (mglobal == Ncols) { /* square matrix */
3667         nlocal = m;
3668       } else {
3669         nlocal = Ncols / size + ((Ncols % size) > rank);
3670       }
3671     } else {
3672       nlocal = csize;
3673     }
3674     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3675     rstart = rend - nlocal;
3676     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3677 
3678     /* next, compute all the lengths */
3679     jj = aij->j;
3680     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3681     olens = dlens + m;
3682     for (i = 0; i < m; i++) {
3683       jend = ii[i + 1] - ii[i];
3684       olen = 0;
3685       dlen = 0;
3686       for (j = 0; j < jend; j++) {
3687         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3688         else dlen++;
3689         jj++;
3690       }
3691       olens[i] = olen;
3692       dlens[i] = dlen;
3693     }
3694 
3695     PetscCall(ISGetBlockSize(isrow, &bs));
3696     PetscCall(ISGetBlockSize(iscol, &cbs));
3697 
3698     PetscCall(MatCreate(comm, &M));
3699     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3700     PetscCall(MatSetBlockSizes(M, bs, cbs));
3701     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3702     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3703     PetscCall(PetscFree(dlens));
3704 
3705   } else { /* call == MAT_REUSE_MATRIX */
3706     M = *newmat;
3707     PetscCall(MatGetLocalSize(M, &i, NULL));
3708     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3709     PetscCall(MatZeroEntries(M));
3710     /*
3711          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3712        rather than the slower MatSetValues().
3713     */
3714     M->was_assembled = PETSC_TRUE;
3715     M->assembled     = PETSC_FALSE;
3716   }
3717 
3718   /* (5) Set values of Msub to *newmat */
3719   PetscCall(PetscMalloc1(count, &colsub));
3720   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3721 
3722   jj = aij->j;
3723   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3724   for (i = 0; i < m; i++) {
3725     row = rstart + i;
3726     nz  = ii[i + 1] - ii[i];
3727     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3728     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3729     jj += nz;
3730     aa += nz;
3731   }
3732   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3733   PetscCall(ISRestoreIndices(iscmap, &cmap));
3734 
3735   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3736   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3737 
3738   PetscCall(PetscFree(colsub));
3739 
3740   /* save Msub, iscol_sub and iscmap used in processor for next request */
3741   if (call == MAT_INITIAL_MATRIX) {
3742     *newmat = M;
3743     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3744     PetscCall(MatDestroy(&Msub));
3745 
3746     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3747     PetscCall(ISDestroy(&iscol_sub));
3748 
3749     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3750     PetscCall(ISDestroy(&iscmap));
3751 
3752     if (iscol_local) {
3753       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3754       PetscCall(ISDestroy(&iscol_local));
3755     }
3756   }
3757   PetscFunctionReturn(PETSC_SUCCESS);
3758 }
3759 
3760 /*
3761     Not great since it makes two copies of the submatrix, first an SeqAIJ
3762   in local and then by concatenating the local matrices the end result.
3763   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3764 
3765   This requires a sequential iscol with all indices.
3766 */
3767 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3768 {
3769   PetscMPIInt rank, size;
3770   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3771   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3772   Mat         M, Mreuse;
3773   MatScalar  *aa, *vwork;
3774   MPI_Comm    comm;
3775   Mat_SeqAIJ *aij;
3776   PetscBool   colflag, allcolumns = PETSC_FALSE;
3777 
3778   PetscFunctionBegin;
3779   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3780   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3781   PetscCallMPI(MPI_Comm_size(comm, &size));
3782 
3783   /* Check for special case: each processor gets entire matrix columns */
3784   PetscCall(ISIdentity(iscol, &colflag));
3785   PetscCall(ISGetLocalSize(iscol, &n));
3786   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3787   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3788 
3789   if (call == MAT_REUSE_MATRIX) {
3790     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3791     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3792     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3793   } else {
3794     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3795   }
3796 
3797   /*
3798       m - number of local rows
3799       n - number of columns (same on all processors)
3800       rstart - first row in new global matrix generated
3801   */
3802   PetscCall(MatGetSize(Mreuse, &m, &n));
3803   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3804   if (call == MAT_INITIAL_MATRIX) {
3805     aij = (Mat_SeqAIJ *)Mreuse->data;
3806     ii  = aij->i;
3807     jj  = aij->j;
3808 
3809     /*
3810         Determine the number of non-zeros in the diagonal and off-diagonal
3811         portions of the matrix in order to do correct preallocation
3812     */
3813 
3814     /* first get start and end of "diagonal" columns */
3815     if (csize == PETSC_DECIDE) {
3816       PetscCall(ISGetSize(isrow, &mglobal));
3817       if (mglobal == n) { /* square matrix */
3818         nlocal = m;
3819       } else {
3820         nlocal = n / size + ((n % size) > rank);
3821       }
3822     } else {
3823       nlocal = csize;
3824     }
3825     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3826     rstart = rend - nlocal;
3827     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3828 
3829     /* next, compute all the lengths */
3830     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3831     olens = dlens + m;
3832     for (i = 0; i < m; i++) {
3833       jend = ii[i + 1] - ii[i];
3834       olen = 0;
3835       dlen = 0;
3836       for (j = 0; j < jend; j++) {
3837         if (*jj < rstart || *jj >= rend) olen++;
3838         else dlen++;
3839         jj++;
3840       }
3841       olens[i] = olen;
3842       dlens[i] = dlen;
3843     }
3844     PetscCall(MatCreate(comm, &M));
3845     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3846     PetscCall(MatSetBlockSizes(M, bs, cbs));
3847     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3848     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3849     PetscCall(PetscFree(dlens));
3850   } else {
3851     PetscInt ml, nl;
3852 
3853     M = *newmat;
3854     PetscCall(MatGetLocalSize(M, &ml, &nl));
3855     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3856     PetscCall(MatZeroEntries(M));
3857     /*
3858          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3859        rather than the slower MatSetValues().
3860     */
3861     M->was_assembled = PETSC_TRUE;
3862     M->assembled     = PETSC_FALSE;
3863   }
3864   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3865   aij = (Mat_SeqAIJ *)Mreuse->data;
3866   ii  = aij->i;
3867   jj  = aij->j;
3868 
3869   /* trigger copy to CPU if needed */
3870   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3871   for (i = 0; i < m; i++) {
3872     row   = rstart + i;
3873     nz    = ii[i + 1] - ii[i];
3874     cwork = jj;
3875     jj    = PetscSafePointerPlusOffset(jj, nz);
3876     vwork = aa;
3877     aa    = PetscSafePointerPlusOffset(aa, nz);
3878     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3879   }
3880   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3881 
3882   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3883   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3884   *newmat = M;
3885 
3886   /* save submatrix used in processor for next request */
3887   if (call == MAT_INITIAL_MATRIX) {
3888     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3889     PetscCall(MatDestroy(&Mreuse));
3890   }
3891   PetscFunctionReturn(PETSC_SUCCESS);
3892 }
3893 
3894 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3895 {
3896   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3897   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3898   const PetscInt *JJ;
3899   PetscBool       nooffprocentries;
3900   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3901 
3902   PetscFunctionBegin;
3903   PetscCall(PetscLayoutSetUp(B->rmap));
3904   PetscCall(PetscLayoutSetUp(B->cmap));
3905   m       = B->rmap->n;
3906   cstart  = B->cmap->rstart;
3907   cend    = B->cmap->rend;
3908   rstart  = B->rmap->rstart;
3909   irstart = Ii[0];
3910 
3911   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3912 
3913   if (PetscDefined(USE_DEBUG)) {
3914     for (i = 0; i < m; i++) {
3915       nnz = Ii[i + 1] - Ii[i];
3916       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3917       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3918       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3919       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3920     }
3921   }
3922 
3923   for (i = 0; i < m; i++) {
3924     nnz     = Ii[i + 1] - Ii[i];
3925     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3926     nnz_max = PetscMax(nnz_max, nnz);
3927     d       = 0;
3928     for (j = 0; j < nnz; j++) {
3929       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3930     }
3931     d_nnz[i] = d;
3932     o_nnz[i] = nnz - d;
3933   }
3934   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3935   PetscCall(PetscFree2(d_nnz, o_nnz));
3936 
3937   for (i = 0; i < m; i++) {
3938     ii = i + rstart;
3939     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3940   }
3941   nooffprocentries    = B->nooffprocentries;
3942   B->nooffprocentries = PETSC_TRUE;
3943   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3944   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3945   B->nooffprocentries = nooffprocentries;
3946 
3947   /* count number of entries below block diagonal */
3948   PetscCall(PetscFree(Aij->ld));
3949   PetscCall(PetscCalloc1(m, &ld));
3950   Aij->ld = ld;
3951   for (i = 0; i < m; i++) {
3952     nnz = Ii[i + 1] - Ii[i];
3953     j   = 0;
3954     while (j < nnz && J[j] < cstart) j++;
3955     ld[i] = j;
3956     if (J) J += nnz;
3957   }
3958 
3959   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3960   PetscFunctionReturn(PETSC_SUCCESS);
3961 }
3962 
3963 /*@
3964   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3965   (the default parallel PETSc format).
3966 
3967   Collective
3968 
3969   Input Parameters:
3970 + B - the matrix
3971 . i - the indices into `j` for the start of each local row (indices start with zero)
3972 . j - the column indices for each local row (indices start with zero)
3973 - v - optional values in the matrix
3974 
3975   Level: developer
3976 
3977   Notes:
3978   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3979   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3980   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3981 
3982   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3983 
3984   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3985 
3986   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3987 
3988   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3989   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3990 
3991   The format which is used for the sparse matrix input, is equivalent to a
3992   row-major ordering.. i.e for the following matrix, the input data expected is
3993   as shown
3994 .vb
3995         1 0 0
3996         2 0 3     P0
3997        -------
3998         4 5 6     P1
3999 
4000      Process0 [P0] rows_owned=[0,1]
4001         i =  {0,1,3}  [size = nrow+1  = 2+1]
4002         j =  {0,0,2}  [size = 3]
4003         v =  {1,2,3}  [size = 3]
4004 
4005      Process1 [P1] rows_owned=[2]
4006         i =  {0,3}    [size = nrow+1  = 1+1]
4007         j =  {0,1,2}  [size = 3]
4008         v =  {4,5,6}  [size = 3]
4009 .ve
4010 
4011 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4012           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4013 @*/
4014 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4015 {
4016   PetscFunctionBegin;
4017   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4018   PetscFunctionReturn(PETSC_SUCCESS);
4019 }
4020 
4021 /*@
4022   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4023   (the default parallel PETSc format).  For good matrix assembly performance
4024   the user should preallocate the matrix storage by setting the parameters
4025   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4026 
4027   Collective
4028 
4029   Input Parameters:
4030 + B     - the matrix
4031 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4032            (same value is used for all local rows)
4033 . d_nnz - array containing the number of nonzeros in the various rows of the
4034            DIAGONAL portion of the local submatrix (possibly different for each row)
4035            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4036            The size of this array is equal to the number of local rows, i.e 'm'.
4037            For matrices that will be factored, you must leave room for (and set)
4038            the diagonal entry even if it is zero.
4039 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4040            submatrix (same value is used for all local rows).
4041 - o_nnz - array containing the number of nonzeros in the various rows of the
4042            OFF-DIAGONAL portion of the local submatrix (possibly different for
4043            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4044            structure. The size of this array is equal to the number
4045            of local rows, i.e 'm'.
4046 
4047   Example Usage:
4048   Consider the following 8x8 matrix with 34 non-zero values, that is
4049   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4050   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4051   as follows
4052 
4053 .vb
4054             1  2  0  |  0  3  0  |  0  4
4055     Proc0   0  5  6  |  7  0  0  |  8  0
4056             9  0 10  | 11  0  0  | 12  0
4057     -------------------------------------
4058            13  0 14  | 15 16 17  |  0  0
4059     Proc1   0 18  0  | 19 20 21  |  0  0
4060             0  0  0  | 22 23  0  | 24  0
4061     -------------------------------------
4062     Proc2  25 26 27  |  0  0 28  | 29  0
4063            30  0  0  | 31 32 33  |  0 34
4064 .ve
4065 
4066   This can be represented as a collection of submatrices as
4067 .vb
4068       A B C
4069       D E F
4070       G H I
4071 .ve
4072 
4073   Where the submatrices A,B,C are owned by proc0, D,E,F are
4074   owned by proc1, G,H,I are owned by proc2.
4075 
4076   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4077   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4078   The 'M','N' parameters are 8,8, and have the same values on all procs.
4079 
4080   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4081   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4082   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4083   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4084   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4085   matrix, and [DF] as another `MATSEQAIJ` matrix.
4086 
4087   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4088   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4089   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4090   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4091   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4092   In this case, the values of `d_nz`, `o_nz` are
4093 .vb
4094      proc0  dnz = 2, o_nz = 2
4095      proc1  dnz = 3, o_nz = 2
4096      proc2  dnz = 1, o_nz = 4
4097 .ve
4098   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4099   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4100   for proc3. i.e we are using 12+15+10=37 storage locations to store
4101   34 values.
4102 
4103   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4104   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4105   In the above case the values for `d_nnz`, `o_nnz` are
4106 .vb
4107      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4108      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4109      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4110 .ve
4111   Here the space allocated is sum of all the above values i.e 34, and
4112   hence pre-allocation is perfect.
4113 
4114   Level: intermediate
4115 
4116   Notes:
4117   If the *_nnz parameter is given then the *_nz parameter is ignored
4118 
4119   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4120   storage.  The stored row and column indices begin with zero.
4121   See [Sparse Matrices](sec_matsparse) for details.
4122 
4123   The parallel matrix is partitioned such that the first m0 rows belong to
4124   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4125   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4126 
4127   The DIAGONAL portion of the local submatrix of a processor can be defined
4128   as the submatrix which is obtained by extraction the part corresponding to
4129   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4130   first row that belongs to the processor, r2 is the last row belonging to
4131   the this processor, and c1-c2 is range of indices of the local part of a
4132   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4133   common case of a square matrix, the row and column ranges are the same and
4134   the DIAGONAL part is also square. The remaining portion of the local
4135   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4136 
4137   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4138 
4139   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4140   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4141   You can also run with the option `-info` and look for messages with the string
4142   malloc in them to see if additional memory allocation was needed.
4143 
4144 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4145           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4146 @*/
4147 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4148 {
4149   PetscFunctionBegin;
4150   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4151   PetscValidType(B, 1);
4152   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4153   PetscFunctionReturn(PETSC_SUCCESS);
4154 }
4155 
4156 /*@
4157   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4158   CSR format for the local rows.
4159 
4160   Collective
4161 
4162   Input Parameters:
4163 + comm - MPI communicator
4164 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4165 . n    - This value should be the same as the local size used in creating the
4166          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4167          calculated if `N` is given) For square matrices n is almost always `m`.
4168 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4169 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4170 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4171 . j    - global column indices
4172 - a    - optional matrix values
4173 
4174   Output Parameter:
4175 . mat - the matrix
4176 
4177   Level: intermediate
4178 
4179   Notes:
4180   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4181   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4182   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4183 
4184   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4185 
4186   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4187 
4188   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4189   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4190 
4191   The format which is used for the sparse matrix input, is equivalent to a
4192   row-major ordering, i.e., for the following matrix, the input data expected is
4193   as shown
4194 .vb
4195         1 0 0
4196         2 0 3     P0
4197        -------
4198         4 5 6     P1
4199 
4200      Process0 [P0] rows_owned=[0,1]
4201         i =  {0,1,3}  [size = nrow+1  = 2+1]
4202         j =  {0,0,2}  [size = 3]
4203         v =  {1,2,3}  [size = 3]
4204 
4205      Process1 [P1] rows_owned=[2]
4206         i =  {0,3}    [size = nrow+1  = 1+1]
4207         j =  {0,1,2}  [size = 3]
4208         v =  {4,5,6}  [size = 3]
4209 .ve
4210 
4211 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4212           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4213 @*/
4214 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4215 {
4216   PetscFunctionBegin;
4217   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4218   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4219   PetscCall(MatCreate(comm, mat));
4220   PetscCall(MatSetSizes(*mat, m, n, M, N));
4221   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4222   PetscCall(MatSetType(*mat, MATMPIAIJ));
4223   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4224   PetscFunctionReturn(PETSC_SUCCESS);
4225 }
4226 
4227 /*@
4228   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4229   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4230   from `MatCreateMPIAIJWithArrays()`
4231 
4232   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4233 
4234   Collective
4235 
4236   Input Parameters:
4237 + mat - the matrix
4238 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4239 . n   - This value should be the same as the local size used in creating the
4240        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4241        calculated if N is given) For square matrices n is almost always m.
4242 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4243 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4244 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4245 . J   - column indices
4246 - v   - matrix values
4247 
4248   Level: deprecated
4249 
4250 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4251           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4252 @*/
4253 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4254 {
4255   PetscInt        nnz, i;
4256   PetscBool       nooffprocentries;
4257   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4258   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4259   PetscScalar    *ad, *ao;
4260   PetscInt        ldi, Iii, md;
4261   const PetscInt *Adi = Ad->i;
4262   PetscInt       *ld  = Aij->ld;
4263 
4264   PetscFunctionBegin;
4265   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4266   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4267   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4268   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4269 
4270   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4271   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4272 
4273   for (i = 0; i < m; i++) {
4274     if (PetscDefined(USE_DEBUG)) {
4275       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4276         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4277         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4278       }
4279     }
4280     nnz = Ii[i + 1] - Ii[i];
4281     Iii = Ii[i];
4282     ldi = ld[i];
4283     md  = Adi[i + 1] - Adi[i];
4284     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4285     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4286     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4287     ad += md;
4288     ao += nnz - md;
4289   }
4290   nooffprocentries      = mat->nooffprocentries;
4291   mat->nooffprocentries = PETSC_TRUE;
4292   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4293   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4294   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4295   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4296   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4297   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4298   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4299   mat->nooffprocentries = nooffprocentries;
4300   PetscFunctionReturn(PETSC_SUCCESS);
4301 }
4302 
4303 /*@
4304   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4305 
4306   Collective
4307 
4308   Input Parameters:
4309 + mat - the matrix
4310 - v   - matrix values, stored by row
4311 
4312   Level: intermediate
4313 
4314   Notes:
4315   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4316 
4317   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4318 
4319 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4320           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4321 @*/
4322 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4323 {
4324   PetscInt        nnz, i, m;
4325   PetscBool       nooffprocentries;
4326   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4327   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4328   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4329   PetscScalar    *ad, *ao;
4330   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4331   PetscInt        ldi, Iii, md;
4332   PetscInt       *ld = Aij->ld;
4333 
4334   PetscFunctionBegin;
4335   m = mat->rmap->n;
4336 
4337   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4338   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4339   Iii = 0;
4340   for (i = 0; i < m; i++) {
4341     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4342     ldi = ld[i];
4343     md  = Adi[i + 1] - Adi[i];
4344     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4345     ad += md;
4346     if (ao) {
4347       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4348       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4349       ao += nnz - md;
4350     }
4351     Iii += nnz;
4352   }
4353   nooffprocentries      = mat->nooffprocentries;
4354   mat->nooffprocentries = PETSC_TRUE;
4355   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4356   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4357   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4358   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4359   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4360   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4361   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4362   mat->nooffprocentries = nooffprocentries;
4363   PetscFunctionReturn(PETSC_SUCCESS);
4364 }
4365 
4366 /*@
4367   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4368   (the default parallel PETSc format).  For good matrix assembly performance
4369   the user should preallocate the matrix storage by setting the parameters
4370   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4371 
4372   Collective
4373 
4374   Input Parameters:
4375 + comm  - MPI communicator
4376 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4377           This value should be the same as the local size used in creating the
4378           y vector for the matrix-vector product y = Ax.
4379 . n     - This value should be the same as the local size used in creating the
4380           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4381           calculated if N is given) For square matrices n is almost always m.
4382 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4383 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4384 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4385           (same value is used for all local rows)
4386 . d_nnz - array containing the number of nonzeros in the various rows of the
4387           DIAGONAL portion of the local submatrix (possibly different for each row)
4388           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4389           The size of this array is equal to the number of local rows, i.e 'm'.
4390 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4391           submatrix (same value is used for all local rows).
4392 - o_nnz - array containing the number of nonzeros in the various rows of the
4393           OFF-DIAGONAL portion of the local submatrix (possibly different for
4394           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4395           structure. The size of this array is equal to the number
4396           of local rows, i.e 'm'.
4397 
4398   Output Parameter:
4399 . A - the matrix
4400 
4401   Options Database Keys:
4402 + -mat_no_inode                     - Do not use inodes
4403 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4404 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4405                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4406                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4407 
4408   Level: intermediate
4409 
4410   Notes:
4411   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4412   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4413   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4414 
4415   If the *_nnz parameter is given then the *_nz parameter is ignored
4416 
4417   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4418   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4419   storage requirements for this matrix.
4420 
4421   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4422   processor than it must be used on all processors that share the object for
4423   that argument.
4424 
4425   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4426   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4427 
4428   The user MUST specify either the local or global matrix dimensions
4429   (possibly both).
4430 
4431   The parallel matrix is partitioned across processors such that the
4432   first `m0` rows belong to process 0, the next `m1` rows belong to
4433   process 1, the next `m2` rows belong to process 2, etc., where
4434   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4435   values corresponding to [m x N] submatrix.
4436 
4437   The columns are logically partitioned with the n0 columns belonging
4438   to 0th partition, the next n1 columns belonging to the next
4439   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4440 
4441   The DIAGONAL portion of the local submatrix on any given processor
4442   is the submatrix corresponding to the rows and columns m,n
4443   corresponding to the given processor. i.e diagonal matrix on
4444   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4445   etc. The remaining portion of the local submatrix [m x (N-n)]
4446   constitute the OFF-DIAGONAL portion. The example below better
4447   illustrates this concept. The two matrices, the DIAGONAL portion and
4448   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4449 
4450   For a square global matrix we define each processor's diagonal portion
4451   to be its local rows and the corresponding columns (a square submatrix);
4452   each processor's off-diagonal portion encompasses the remainder of the
4453   local matrix (a rectangular submatrix).
4454 
4455   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4456 
4457   When calling this routine with a single process communicator, a matrix of
4458   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4459   type of communicator, use the construction mechanism
4460 .vb
4461   MatCreate(..., &A);
4462   MatSetType(A, MATMPIAIJ);
4463   MatSetSizes(A, m, n, M, N);
4464   MatMPIAIJSetPreallocation(A, ...);
4465 .ve
4466 
4467   By default, this format uses inodes (identical nodes) when possible.
4468   We search for consecutive rows with the same nonzero structure, thereby
4469   reusing matrix information to achieve increased efficiency.
4470 
4471   Example Usage:
4472   Consider the following 8x8 matrix with 34 non-zero values, that is
4473   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4474   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4475   as follows
4476 
4477 .vb
4478             1  2  0  |  0  3  0  |  0  4
4479     Proc0   0  5  6  |  7  0  0  |  8  0
4480             9  0 10  | 11  0  0  | 12  0
4481     -------------------------------------
4482            13  0 14  | 15 16 17  |  0  0
4483     Proc1   0 18  0  | 19 20 21  |  0  0
4484             0  0  0  | 22 23  0  | 24  0
4485     -------------------------------------
4486     Proc2  25 26 27  |  0  0 28  | 29  0
4487            30  0  0  | 31 32 33  |  0 34
4488 .ve
4489 
4490   This can be represented as a collection of submatrices as
4491 
4492 .vb
4493       A B C
4494       D E F
4495       G H I
4496 .ve
4497 
4498   Where the submatrices A,B,C are owned by proc0, D,E,F are
4499   owned by proc1, G,H,I are owned by proc2.
4500 
4501   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4502   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4503   The 'M','N' parameters are 8,8, and have the same values on all procs.
4504 
4505   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4506   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4507   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4508   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4509   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4510   matrix, and [DF] as another SeqAIJ matrix.
4511 
4512   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4513   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4514   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4515   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4516   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4517   In this case, the values of `d_nz`,`o_nz` are
4518 .vb
4519      proc0  dnz = 2, o_nz = 2
4520      proc1  dnz = 3, o_nz = 2
4521      proc2  dnz = 1, o_nz = 4
4522 .ve
4523   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4524   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4525   for proc3. i.e we are using 12+15+10=37 storage locations to store
4526   34 values.
4527 
4528   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4529   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4530   In the above case the values for d_nnz,o_nnz are
4531 .vb
4532      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4533      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4534      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4535 .ve
4536   Here the space allocated is sum of all the above values i.e 34, and
4537   hence pre-allocation is perfect.
4538 
4539 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4540           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4541           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4542 @*/
4543 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4544 {
4545   PetscMPIInt size;
4546 
4547   PetscFunctionBegin;
4548   PetscCall(MatCreate(comm, A));
4549   PetscCall(MatSetSizes(*A, m, n, M, N));
4550   PetscCallMPI(MPI_Comm_size(comm, &size));
4551   if (size > 1) {
4552     PetscCall(MatSetType(*A, MATMPIAIJ));
4553     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4554   } else {
4555     PetscCall(MatSetType(*A, MATSEQAIJ));
4556     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4557   }
4558   PetscFunctionReturn(PETSC_SUCCESS);
4559 }
4560 
4561 /*MC
4562     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4563 
4564     Synopsis:
4565     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4566 
4567     Not Collective
4568 
4569     Input Parameter:
4570 .   A - the `MATMPIAIJ` matrix
4571 
4572     Output Parameters:
4573 +   Ad - the diagonal portion of the matrix
4574 .   Ao - the off-diagonal portion of the matrix
4575 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4576 -   ierr - error code
4577 
4578      Level: advanced
4579 
4580     Note:
4581     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4582 
4583 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4584 M*/
4585 
4586 /*MC
4587     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4588 
4589     Synopsis:
4590     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4591 
4592     Not Collective
4593 
4594     Input Parameters:
4595 +   A - the `MATMPIAIJ` matrix
4596 .   Ad - the diagonal portion of the matrix
4597 .   Ao - the off-diagonal portion of the matrix
4598 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4599 -   ierr - error code
4600 
4601      Level: advanced
4602 
4603 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4604 M*/
4605 
4606 /*@C
4607   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4608 
4609   Not Collective
4610 
4611   Input Parameter:
4612 . A - The `MATMPIAIJ` matrix
4613 
4614   Output Parameters:
4615 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4616 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4617 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4618 
4619   Level: intermediate
4620 
4621   Note:
4622   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4623   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4624   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4625   local column numbers to global column numbers in the original matrix.
4626 
4627   Fortran Notes:
4628   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4629 
4630 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4631 @*/
4632 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4633 {
4634   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4635   PetscBool   flg;
4636 
4637   PetscFunctionBegin;
4638   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4639   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4640   if (Ad) *Ad = a->A;
4641   if (Ao) *Ao = a->B;
4642   if (colmap) *colmap = a->garray;
4643   PetscFunctionReturn(PETSC_SUCCESS);
4644 }
4645 
4646 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4647 {
4648   PetscInt     m, N, i, rstart, nnz, Ii;
4649   PetscInt    *indx;
4650   PetscScalar *values;
4651   MatType      rootType;
4652 
4653   PetscFunctionBegin;
4654   PetscCall(MatGetSize(inmat, &m, &N));
4655   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4656     PetscInt *dnz, *onz, sum, bs, cbs;
4657 
4658     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4659     /* Check sum(n) = N */
4660     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4661     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4662 
4663     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4664     rstart -= m;
4665 
4666     MatPreallocateBegin(comm, m, n, dnz, onz);
4667     for (i = 0; i < m; i++) {
4668       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4669       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4670       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4671     }
4672 
4673     PetscCall(MatCreate(comm, outmat));
4674     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4675     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4676     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4677     PetscCall(MatGetRootType_Private(inmat, &rootType));
4678     PetscCall(MatSetType(*outmat, rootType));
4679     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4680     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4681     MatPreallocateEnd(dnz, onz);
4682     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4683   }
4684 
4685   /* numeric phase */
4686   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4687   for (i = 0; i < m; i++) {
4688     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4689     Ii = i + rstart;
4690     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4691     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4692   }
4693   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4694   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4695   PetscFunctionReturn(PETSC_SUCCESS);
4696 }
4697 
4698 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4699 {
4700   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4701 
4702   PetscFunctionBegin;
4703   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4704   PetscCall(PetscFree(merge->id_r));
4705   PetscCall(PetscFree(merge->len_s));
4706   PetscCall(PetscFree(merge->len_r));
4707   PetscCall(PetscFree(merge->bi));
4708   PetscCall(PetscFree(merge->bj));
4709   PetscCall(PetscFree(merge->buf_ri[0]));
4710   PetscCall(PetscFree(merge->buf_ri));
4711   PetscCall(PetscFree(merge->buf_rj[0]));
4712   PetscCall(PetscFree(merge->buf_rj));
4713   PetscCall(PetscFree(merge->coi));
4714   PetscCall(PetscFree(merge->coj));
4715   PetscCall(PetscFree(merge->owners_co));
4716   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4717   PetscCall(PetscFree(merge));
4718   PetscFunctionReturn(PETSC_SUCCESS);
4719 }
4720 
4721 #include <../src/mat/utils/freespace.h>
4722 #include <petscbt.h>
4723 
4724 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4725 {
4726   MPI_Comm             comm;
4727   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4728   PetscMPIInt          size, rank, taga, *len_s;
4729   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4730   PetscMPIInt          proc, k;
4731   PetscInt           **buf_ri, **buf_rj;
4732   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4733   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4734   MPI_Request         *s_waits, *r_waits;
4735   MPI_Status          *status;
4736   const MatScalar     *aa, *a_a;
4737   MatScalar          **abuf_r, *ba_i;
4738   Mat_Merge_SeqsToMPI *merge;
4739   PetscContainer       container;
4740 
4741   PetscFunctionBegin;
4742   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4743   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4744 
4745   PetscCallMPI(MPI_Comm_size(comm, &size));
4746   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4747 
4748   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4749   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4750   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4751   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4752   aa = a_a;
4753 
4754   bi     = merge->bi;
4755   bj     = merge->bj;
4756   buf_ri = merge->buf_ri;
4757   buf_rj = merge->buf_rj;
4758 
4759   PetscCall(PetscMalloc1(size, &status));
4760   owners = merge->rowmap->range;
4761   len_s  = merge->len_s;
4762 
4763   /* send and recv matrix values */
4764   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4765   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4766 
4767   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4768   for (proc = 0, k = 0; proc < size; proc++) {
4769     if (!len_s[proc]) continue;
4770     i = owners[proc];
4771     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4772     k++;
4773   }
4774 
4775   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4776   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4777   PetscCall(PetscFree(status));
4778 
4779   PetscCall(PetscFree(s_waits));
4780   PetscCall(PetscFree(r_waits));
4781 
4782   /* insert mat values of mpimat */
4783   PetscCall(PetscMalloc1(N, &ba_i));
4784   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4785 
4786   for (k = 0; k < merge->nrecv; k++) {
4787     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4788     nrows       = *buf_ri_k[k];
4789     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4790     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4791   }
4792 
4793   /* set values of ba */
4794   m = merge->rowmap->n;
4795   for (i = 0; i < m; i++) {
4796     arow = owners[rank] + i;
4797     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4798     bnzi = bi[i + 1] - bi[i];
4799     PetscCall(PetscArrayzero(ba_i, bnzi));
4800 
4801     /* add local non-zero vals of this proc's seqmat into ba */
4802     anzi   = ai[arow + 1] - ai[arow];
4803     aj     = a->j + ai[arow];
4804     aa     = a_a + ai[arow];
4805     nextaj = 0;
4806     for (j = 0; nextaj < anzi; j++) {
4807       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4808         ba_i[j] += aa[nextaj++];
4809       }
4810     }
4811 
4812     /* add received vals into ba */
4813     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4814       /* i-th row */
4815       if (i == *nextrow[k]) {
4816         anzi   = *(nextai[k] + 1) - *nextai[k];
4817         aj     = buf_rj[k] + *nextai[k];
4818         aa     = abuf_r[k] + *nextai[k];
4819         nextaj = 0;
4820         for (j = 0; nextaj < anzi; j++) {
4821           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4822             ba_i[j] += aa[nextaj++];
4823           }
4824         }
4825         nextrow[k]++;
4826         nextai[k]++;
4827       }
4828     }
4829     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4830   }
4831   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4832   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4833   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4834 
4835   PetscCall(PetscFree(abuf_r[0]));
4836   PetscCall(PetscFree(abuf_r));
4837   PetscCall(PetscFree(ba_i));
4838   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4839   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4840   PetscFunctionReturn(PETSC_SUCCESS);
4841 }
4842 
4843 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4844 {
4845   Mat                  B_mpi;
4846   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4847   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4848   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4849   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4850   PetscInt             len, *dnz, *onz, bs, cbs;
4851   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4852   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4853   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4854   MPI_Status          *status;
4855   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4856   PetscBT              lnkbt;
4857   Mat_Merge_SeqsToMPI *merge;
4858   PetscContainer       container;
4859 
4860   PetscFunctionBegin;
4861   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4862 
4863   /* make sure it is a PETSc comm */
4864   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4865   PetscCallMPI(MPI_Comm_size(comm, &size));
4866   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4867 
4868   PetscCall(PetscNew(&merge));
4869   PetscCall(PetscMalloc1(size, &status));
4870 
4871   /* determine row ownership */
4872   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4873   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4874   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4875   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4876   PetscCall(PetscLayoutSetUp(merge->rowmap));
4877   PetscCall(PetscMalloc1(size, &len_si));
4878   PetscCall(PetscMalloc1(size, &merge->len_s));
4879 
4880   m      = merge->rowmap->n;
4881   owners = merge->rowmap->range;
4882 
4883   /* determine the number of messages to send, their lengths */
4884   len_s = merge->len_s;
4885 
4886   len          = 0; /* length of buf_si[] */
4887   merge->nsend = 0;
4888   for (PetscMPIInt proc = 0; proc < size; proc++) {
4889     len_si[proc] = 0;
4890     if (proc == rank) {
4891       len_s[proc] = 0;
4892     } else {
4893       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4894       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4895     }
4896     if (len_s[proc]) {
4897       merge->nsend++;
4898       nrows = 0;
4899       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4900         if (ai[i + 1] > ai[i]) nrows++;
4901       }
4902       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4903       len += len_si[proc];
4904     }
4905   }
4906 
4907   /* determine the number and length of messages to receive for ij-structure */
4908   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4909   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4910 
4911   /* post the Irecv of j-structure */
4912   PetscCall(PetscCommGetNewTag(comm, &tagj));
4913   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4914 
4915   /* post the Isend of j-structure */
4916   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4917 
4918   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4919     if (!len_s[proc]) continue;
4920     i = owners[proc];
4921     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4922     k++;
4923   }
4924 
4925   /* receives and sends of j-structure are complete */
4926   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4927   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4928 
4929   /* send and recv i-structure */
4930   PetscCall(PetscCommGetNewTag(comm, &tagi));
4931   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4932 
4933   PetscCall(PetscMalloc1(len + 1, &buf_s));
4934   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4935   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4936     if (!len_s[proc]) continue;
4937     /* form outgoing message for i-structure:
4938          buf_si[0]:                 nrows to be sent
4939                [1:nrows]:           row index (global)
4940                [nrows+1:2*nrows+1]: i-structure index
4941     */
4942     nrows       = len_si[proc] / 2 - 1;
4943     buf_si_i    = buf_si + nrows + 1;
4944     buf_si[0]   = nrows;
4945     buf_si_i[0] = 0;
4946     nrows       = 0;
4947     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4948       anzi = ai[i + 1] - ai[i];
4949       if (anzi) {
4950         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4951         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4952         nrows++;
4953       }
4954     }
4955     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4956     k++;
4957     buf_si += len_si[proc];
4958   }
4959 
4960   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4961   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4962 
4963   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4964   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4965 
4966   PetscCall(PetscFree(len_si));
4967   PetscCall(PetscFree(len_ri));
4968   PetscCall(PetscFree(rj_waits));
4969   PetscCall(PetscFree2(si_waits, sj_waits));
4970   PetscCall(PetscFree(ri_waits));
4971   PetscCall(PetscFree(buf_s));
4972   PetscCall(PetscFree(status));
4973 
4974   /* compute a local seq matrix in each processor */
4975   /* allocate bi array and free space for accumulating nonzero column info */
4976   PetscCall(PetscMalloc1(m + 1, &bi));
4977   bi[0] = 0;
4978 
4979   /* create and initialize a linked list */
4980   nlnk = N + 1;
4981   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4982 
4983   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4984   len = ai[owners[rank + 1]] - ai[owners[rank]];
4985   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4986 
4987   current_space = free_space;
4988 
4989   /* determine symbolic info for each local row */
4990   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4991 
4992   for (k = 0; k < merge->nrecv; k++) {
4993     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4994     nrows       = *buf_ri_k[k];
4995     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4996     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4997   }
4998 
4999   MatPreallocateBegin(comm, m, n, dnz, onz);
5000   len = 0;
5001   for (i = 0; i < m; i++) {
5002     bnzi = 0;
5003     /* add local non-zero cols of this proc's seqmat into lnk */
5004     arow = owners[rank] + i;
5005     anzi = ai[arow + 1] - ai[arow];
5006     aj   = a->j + ai[arow];
5007     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5008     bnzi += nlnk;
5009     /* add received col data into lnk */
5010     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5011       if (i == *nextrow[k]) {            /* i-th row */
5012         anzi = *(nextai[k] + 1) - *nextai[k];
5013         aj   = buf_rj[k] + *nextai[k];
5014         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5015         bnzi += nlnk;
5016         nextrow[k]++;
5017         nextai[k]++;
5018       }
5019     }
5020     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5021 
5022     /* if free space is not available, make more free space */
5023     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5024     /* copy data into free space, then initialize lnk */
5025     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5026     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5027 
5028     current_space->array += bnzi;
5029     current_space->local_used += bnzi;
5030     current_space->local_remaining -= bnzi;
5031 
5032     bi[i + 1] = bi[i] + bnzi;
5033   }
5034 
5035   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5036 
5037   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5038   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5039   PetscCall(PetscLLDestroy(lnk, lnkbt));
5040 
5041   /* create symbolic parallel matrix B_mpi */
5042   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5043   PetscCall(MatCreate(comm, &B_mpi));
5044   if (n == PETSC_DECIDE) {
5045     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5046   } else {
5047     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5048   }
5049   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5050   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5051   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5052   MatPreallocateEnd(dnz, onz);
5053   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5054 
5055   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5056   B_mpi->assembled = PETSC_FALSE;
5057   merge->bi        = bi;
5058   merge->bj        = bj;
5059   merge->buf_ri    = buf_ri;
5060   merge->buf_rj    = buf_rj;
5061   merge->coi       = NULL;
5062   merge->coj       = NULL;
5063   merge->owners_co = NULL;
5064 
5065   PetscCall(PetscCommDestroy(&comm));
5066 
5067   /* attach the supporting struct to B_mpi for reuse */
5068   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5069   PetscCall(PetscContainerSetPointer(container, merge));
5070   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5071   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5072   PetscCall(PetscContainerDestroy(&container));
5073   *mpimat = B_mpi;
5074 
5075   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5076   PetscFunctionReturn(PETSC_SUCCESS);
5077 }
5078 
5079 /*@
5080   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5081   matrices from each processor
5082 
5083   Collective
5084 
5085   Input Parameters:
5086 + comm   - the communicators the parallel matrix will live on
5087 . seqmat - the input sequential matrices
5088 . m      - number of local rows (or `PETSC_DECIDE`)
5089 . n      - number of local columns (or `PETSC_DECIDE`)
5090 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5091 
5092   Output Parameter:
5093 . mpimat - the parallel matrix generated
5094 
5095   Level: advanced
5096 
5097   Note:
5098   The dimensions of the sequential matrix in each processor MUST be the same.
5099   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5100   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5101 
5102 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5103 @*/
5104 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5105 {
5106   PetscMPIInt size;
5107 
5108   PetscFunctionBegin;
5109   PetscCallMPI(MPI_Comm_size(comm, &size));
5110   if (size == 1) {
5111     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5112     if (scall == MAT_INITIAL_MATRIX) {
5113       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5114     } else {
5115       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5116     }
5117     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5118     PetscFunctionReturn(PETSC_SUCCESS);
5119   }
5120   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5121   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5122   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5123   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5124   PetscFunctionReturn(PETSC_SUCCESS);
5125 }
5126 
5127 /*@
5128   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5129 
5130   Not Collective
5131 
5132   Input Parameter:
5133 . A - the matrix
5134 
5135   Output Parameter:
5136 . A_loc - the local sequential matrix generated
5137 
5138   Level: developer
5139 
5140   Notes:
5141   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5142   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5143   `n` is the global column count obtained with `MatGetSize()`
5144 
5145   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5146 
5147   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5148 
5149   Destroy the matrix with `MatDestroy()`
5150 
5151 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5152 @*/
5153 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5154 {
5155   PetscBool mpi;
5156 
5157   PetscFunctionBegin;
5158   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5159   if (mpi) {
5160     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5161   } else {
5162     *A_loc = A;
5163     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5164   }
5165   PetscFunctionReturn(PETSC_SUCCESS);
5166 }
5167 
5168 /*@
5169   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5170 
5171   Not Collective
5172 
5173   Input Parameters:
5174 + A     - the matrix
5175 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5176 
5177   Output Parameter:
5178 . A_loc - the local sequential matrix generated
5179 
5180   Level: developer
5181 
5182   Notes:
5183   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5184   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5185   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5186 
5187   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5188 
5189   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5190   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5191   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5192   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5193 
5194 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5195 @*/
5196 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5197 {
5198   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5199   Mat_SeqAIJ        *mat, *a, *b;
5200   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5201   const PetscScalar *aa, *ba, *aav, *bav;
5202   PetscScalar       *ca, *cam;
5203   PetscMPIInt        size;
5204   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5205   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5206   PetscBool          match;
5207 
5208   PetscFunctionBegin;
5209   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5210   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5211   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5212   if (size == 1) {
5213     if (scall == MAT_INITIAL_MATRIX) {
5214       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5215       *A_loc = mpimat->A;
5216     } else if (scall == MAT_REUSE_MATRIX) {
5217       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5218     }
5219     PetscFunctionReturn(PETSC_SUCCESS);
5220   }
5221 
5222   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5223   a  = (Mat_SeqAIJ *)mpimat->A->data;
5224   b  = (Mat_SeqAIJ *)mpimat->B->data;
5225   ai = a->i;
5226   aj = a->j;
5227   bi = b->i;
5228   bj = b->j;
5229   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5230   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5231   aa = aav;
5232   ba = bav;
5233   if (scall == MAT_INITIAL_MATRIX) {
5234     PetscCall(PetscMalloc1(1 + am, &ci));
5235     ci[0] = 0;
5236     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5237     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5238     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5239     k = 0;
5240     for (i = 0; i < am; i++) {
5241       ncols_o = bi[i + 1] - bi[i];
5242       ncols_d = ai[i + 1] - ai[i];
5243       /* off-diagonal portion of A */
5244       for (jo = 0; jo < ncols_o; jo++) {
5245         col = cmap[*bj];
5246         if (col >= cstart) break;
5247         cj[k] = col;
5248         bj++;
5249         ca[k++] = *ba++;
5250       }
5251       /* diagonal portion of A */
5252       for (j = 0; j < ncols_d; j++) {
5253         cj[k]   = cstart + *aj++;
5254         ca[k++] = *aa++;
5255       }
5256       /* off-diagonal portion of A */
5257       for (j = jo; j < ncols_o; j++) {
5258         cj[k]   = cmap[*bj++];
5259         ca[k++] = *ba++;
5260       }
5261     }
5262     /* put together the new matrix */
5263     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5264     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5265     /* Since these are PETSc arrays, change flags to free them as necessary. */
5266     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5267     mat->free_a  = PETSC_TRUE;
5268     mat->free_ij = PETSC_TRUE;
5269     mat->nonew   = 0;
5270   } else if (scall == MAT_REUSE_MATRIX) {
5271     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5272     ci  = mat->i;
5273     cj  = mat->j;
5274     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5275     for (i = 0; i < am; i++) {
5276       /* off-diagonal portion of A */
5277       ncols_o = bi[i + 1] - bi[i];
5278       for (jo = 0; jo < ncols_o; jo++) {
5279         col = cmap[*bj];
5280         if (col >= cstart) break;
5281         *cam++ = *ba++;
5282         bj++;
5283       }
5284       /* diagonal portion of A */
5285       ncols_d = ai[i + 1] - ai[i];
5286       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5287       /* off-diagonal portion of A */
5288       for (j = jo; j < ncols_o; j++) {
5289         *cam++ = *ba++;
5290         bj++;
5291       }
5292     }
5293     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5294   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5295   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5296   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5297   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5298   PetscFunctionReturn(PETSC_SUCCESS);
5299 }
5300 
5301 /*@
5302   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5303   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5304 
5305   Not Collective
5306 
5307   Input Parameters:
5308 + A     - the matrix
5309 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5310 
5311   Output Parameters:
5312 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5313 - A_loc - the local sequential matrix generated
5314 
5315   Level: developer
5316 
5317   Note:
5318   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5319   part, then those associated with the off-diagonal part (in its local ordering)
5320 
5321 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5322 @*/
5323 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5324 {
5325   Mat             Ao, Ad;
5326   const PetscInt *cmap;
5327   PetscMPIInt     size;
5328   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5329 
5330   PetscFunctionBegin;
5331   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5332   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5333   if (size == 1) {
5334     if (scall == MAT_INITIAL_MATRIX) {
5335       PetscCall(PetscObjectReference((PetscObject)Ad));
5336       *A_loc = Ad;
5337     } else if (scall == MAT_REUSE_MATRIX) {
5338       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5339     }
5340     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5341     PetscFunctionReturn(PETSC_SUCCESS);
5342   }
5343   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5344   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5345   if (f) {
5346     PetscCall((*f)(A, scall, glob, A_loc));
5347   } else {
5348     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5349     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5350     Mat_SeqAIJ        *c;
5351     PetscInt          *ai = a->i, *aj = a->j;
5352     PetscInt          *bi = b->i, *bj = b->j;
5353     PetscInt          *ci, *cj;
5354     const PetscScalar *aa, *ba;
5355     PetscScalar       *ca;
5356     PetscInt           i, j, am, dn, on;
5357 
5358     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5359     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5360     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5361     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5362     if (scall == MAT_INITIAL_MATRIX) {
5363       PetscInt k;
5364       PetscCall(PetscMalloc1(1 + am, &ci));
5365       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5366       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5367       ci[0] = 0;
5368       for (i = 0, k = 0; i < am; i++) {
5369         const PetscInt ncols_o = bi[i + 1] - bi[i];
5370         const PetscInt ncols_d = ai[i + 1] - ai[i];
5371         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5372         /* diagonal portion of A */
5373         for (j = 0; j < ncols_d; j++, k++) {
5374           cj[k] = *aj++;
5375           ca[k] = *aa++;
5376         }
5377         /* off-diagonal portion of A */
5378         for (j = 0; j < ncols_o; j++, k++) {
5379           cj[k] = dn + *bj++;
5380           ca[k] = *ba++;
5381         }
5382       }
5383       /* put together the new matrix */
5384       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5385       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5386       /* Since these are PETSc arrays, change flags to free them as necessary. */
5387       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5388       c->free_a  = PETSC_TRUE;
5389       c->free_ij = PETSC_TRUE;
5390       c->nonew   = 0;
5391       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5392     } else if (scall == MAT_REUSE_MATRIX) {
5393       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5394       for (i = 0; i < am; i++) {
5395         const PetscInt ncols_d = ai[i + 1] - ai[i];
5396         const PetscInt ncols_o = bi[i + 1] - bi[i];
5397         /* diagonal portion of A */
5398         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5399         /* off-diagonal portion of A */
5400         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5401       }
5402       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5403     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5404     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5405     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5406     if (glob) {
5407       PetscInt cst, *gidx;
5408 
5409       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5410       PetscCall(PetscMalloc1(dn + on, &gidx));
5411       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5412       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5413       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5414     }
5415   }
5416   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5417   PetscFunctionReturn(PETSC_SUCCESS);
5418 }
5419 
5420 /*@C
5421   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5422 
5423   Not Collective
5424 
5425   Input Parameters:
5426 + A     - the matrix
5427 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5428 . row   - index set of rows to extract (or `NULL`)
5429 - col   - index set of columns to extract (or `NULL`)
5430 
5431   Output Parameter:
5432 . A_loc - the local sequential matrix generated
5433 
5434   Level: developer
5435 
5436 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5437 @*/
5438 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5439 {
5440   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5441   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5442   IS          isrowa, iscola;
5443   Mat        *aloc;
5444   PetscBool   match;
5445 
5446   PetscFunctionBegin;
5447   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5448   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5449   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5450   if (!row) {
5451     start = A->rmap->rstart;
5452     end   = A->rmap->rend;
5453     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5454   } else {
5455     isrowa = *row;
5456   }
5457   if (!col) {
5458     start = A->cmap->rstart;
5459     cmap  = a->garray;
5460     nzA   = a->A->cmap->n;
5461     nzB   = a->B->cmap->n;
5462     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5463     ncols = 0;
5464     for (i = 0; i < nzB; i++) {
5465       if (cmap[i] < start) idx[ncols++] = cmap[i];
5466       else break;
5467     }
5468     imark = i;
5469     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5470     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5471     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5472   } else {
5473     iscola = *col;
5474   }
5475   if (scall != MAT_INITIAL_MATRIX) {
5476     PetscCall(PetscMalloc1(1, &aloc));
5477     aloc[0] = *A_loc;
5478   }
5479   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5480   if (!col) { /* attach global id of condensed columns */
5481     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5482   }
5483   *A_loc = aloc[0];
5484   PetscCall(PetscFree(aloc));
5485   if (!row) PetscCall(ISDestroy(&isrowa));
5486   if (!col) PetscCall(ISDestroy(&iscola));
5487   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5488   PetscFunctionReturn(PETSC_SUCCESS);
5489 }
5490 
5491 /*
5492  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5493  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5494  * on a global size.
5495  * */
5496 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5497 {
5498   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5499   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5500   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5501   PetscMPIInt            owner;
5502   PetscSFNode           *iremote, *oiremote;
5503   const PetscInt        *lrowindices;
5504   PetscSF                sf, osf;
5505   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5506   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5507   MPI_Comm               comm;
5508   ISLocalToGlobalMapping mapping;
5509   const PetscScalar     *pd_a, *po_a;
5510 
5511   PetscFunctionBegin;
5512   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5513   /* plocalsize is the number of roots
5514    * nrows is the number of leaves
5515    * */
5516   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5517   PetscCall(ISGetLocalSize(rows, &nrows));
5518   PetscCall(PetscCalloc1(nrows, &iremote));
5519   PetscCall(ISGetIndices(rows, &lrowindices));
5520   for (i = 0; i < nrows; i++) {
5521     /* Find a remote index and an owner for a row
5522      * The row could be local or remote
5523      * */
5524     owner = 0;
5525     lidx  = 0;
5526     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5527     iremote[i].index = lidx;
5528     iremote[i].rank  = owner;
5529   }
5530   /* Create SF to communicate how many nonzero columns for each row */
5531   PetscCall(PetscSFCreate(comm, &sf));
5532   /* SF will figure out the number of nonzero columns for each row, and their
5533    * offsets
5534    * */
5535   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5536   PetscCall(PetscSFSetFromOptions(sf));
5537   PetscCall(PetscSFSetUp(sf));
5538 
5539   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5540   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5541   PetscCall(PetscCalloc1(nrows, &pnnz));
5542   roffsets[0] = 0;
5543   roffsets[1] = 0;
5544   for (i = 0; i < plocalsize; i++) {
5545     /* diagonal */
5546     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5547     /* off-diagonal */
5548     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5549     /* compute offsets so that we relative location for each row */
5550     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5551     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5552   }
5553   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5554   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5555   /* 'r' means root, and 'l' means leaf */
5556   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5557   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5558   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5559   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5560   PetscCall(PetscSFDestroy(&sf));
5561   PetscCall(PetscFree(roffsets));
5562   PetscCall(PetscFree(nrcols));
5563   dntotalcols = 0;
5564   ontotalcols = 0;
5565   ncol        = 0;
5566   for (i = 0; i < nrows; i++) {
5567     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5568     ncol    = PetscMax(pnnz[i], ncol);
5569     /* diagonal */
5570     dntotalcols += nlcols[i * 2 + 0];
5571     /* off-diagonal */
5572     ontotalcols += nlcols[i * 2 + 1];
5573   }
5574   /* We do not need to figure the right number of columns
5575    * since all the calculations will be done by going through the raw data
5576    * */
5577   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5578   PetscCall(MatSetUp(*P_oth));
5579   PetscCall(PetscFree(pnnz));
5580   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5581   /* diagonal */
5582   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5583   /* off-diagonal */
5584   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5585   /* diagonal */
5586   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5587   /* off-diagonal */
5588   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5589   dntotalcols = 0;
5590   ontotalcols = 0;
5591   ntotalcols  = 0;
5592   for (i = 0; i < nrows; i++) {
5593     owner = 0;
5594     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5595     /* Set iremote for diag matrix */
5596     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5597       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5598       iremote[dntotalcols].rank  = owner;
5599       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5600       ilocal[dntotalcols++] = ntotalcols++;
5601     }
5602     /* off-diagonal */
5603     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5604       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5605       oiremote[ontotalcols].rank  = owner;
5606       oilocal[ontotalcols++]      = ntotalcols++;
5607     }
5608   }
5609   PetscCall(ISRestoreIndices(rows, &lrowindices));
5610   PetscCall(PetscFree(loffsets));
5611   PetscCall(PetscFree(nlcols));
5612   PetscCall(PetscSFCreate(comm, &sf));
5613   /* P serves as roots and P_oth is leaves
5614    * Diag matrix
5615    * */
5616   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5617   PetscCall(PetscSFSetFromOptions(sf));
5618   PetscCall(PetscSFSetUp(sf));
5619 
5620   PetscCall(PetscSFCreate(comm, &osf));
5621   /* off-diagonal */
5622   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5623   PetscCall(PetscSFSetFromOptions(osf));
5624   PetscCall(PetscSFSetUp(osf));
5625   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5626   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5627   /* operate on the matrix internal data to save memory */
5628   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5629   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5630   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5631   /* Convert to global indices for diag matrix */
5632   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5633   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5634   /* We want P_oth store global indices */
5635   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5636   /* Use memory scalable approach */
5637   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5638   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5639   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5640   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5641   /* Convert back to local indices */
5642   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5643   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5644   nout = 0;
5645   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5646   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5647   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5648   /* Exchange values */
5649   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5650   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5651   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5652   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5653   /* Stop PETSc from shrinking memory */
5654   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5655   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5656   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5657   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5658   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5659   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5660   PetscCall(PetscSFDestroy(&sf));
5661   PetscCall(PetscSFDestroy(&osf));
5662   PetscFunctionReturn(PETSC_SUCCESS);
5663 }
5664 
5665 /*
5666  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5667  * This supports MPIAIJ and MAIJ
5668  * */
5669 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5670 {
5671   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5672   Mat_SeqAIJ *p_oth;
5673   IS          rows, map;
5674   PetscHMapI  hamp;
5675   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5676   MPI_Comm    comm;
5677   PetscSF     sf, osf;
5678   PetscBool   has;
5679 
5680   PetscFunctionBegin;
5681   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5682   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5683   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5684    *  and then create a submatrix (that often is an overlapping matrix)
5685    * */
5686   if (reuse == MAT_INITIAL_MATRIX) {
5687     /* Use a hash table to figure out unique keys */
5688     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5689     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5690     count = 0;
5691     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5692     for (i = 0; i < a->B->cmap->n; i++) {
5693       key = a->garray[i] / dof;
5694       PetscCall(PetscHMapIHas(hamp, key, &has));
5695       if (!has) {
5696         mapping[i] = count;
5697         PetscCall(PetscHMapISet(hamp, key, count++));
5698       } else {
5699         /* Current 'i' has the same value the previous step */
5700         mapping[i] = count - 1;
5701       }
5702     }
5703     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5704     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5705     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5706     PetscCall(PetscCalloc1(htsize, &rowindices));
5707     off = 0;
5708     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5709     PetscCall(PetscHMapIDestroy(&hamp));
5710     PetscCall(PetscSortInt(htsize, rowindices));
5711     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5712     /* In case, the matrix was already created but users want to recreate the matrix */
5713     PetscCall(MatDestroy(P_oth));
5714     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5715     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5716     PetscCall(ISDestroy(&map));
5717     PetscCall(ISDestroy(&rows));
5718   } else if (reuse == MAT_REUSE_MATRIX) {
5719     /* If matrix was already created, we simply update values using SF objects
5720      * that as attached to the matrix earlier.
5721      */
5722     const PetscScalar *pd_a, *po_a;
5723 
5724     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5725     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5726     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5727     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5728     /* Update values in place */
5729     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5730     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5731     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5732     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5733     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5734     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5735     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5736     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5737   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5738   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5739   PetscFunctionReturn(PETSC_SUCCESS);
5740 }
5741 
5742 /*@C
5743   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5744 
5745   Collective
5746 
5747   Input Parameters:
5748 + A     - the first matrix in `MATMPIAIJ` format
5749 . B     - the second matrix in `MATMPIAIJ` format
5750 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5751 
5752   Output Parameters:
5753 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5754 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5755 - B_seq - the sequential matrix generated
5756 
5757   Level: developer
5758 
5759 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5760 @*/
5761 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5762 {
5763   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5764   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5765   IS          isrowb, iscolb;
5766   Mat        *bseq = NULL;
5767 
5768   PetscFunctionBegin;
5769   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5770              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5771   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5772 
5773   if (scall == MAT_INITIAL_MATRIX) {
5774     start = A->cmap->rstart;
5775     cmap  = a->garray;
5776     nzA   = a->A->cmap->n;
5777     nzB   = a->B->cmap->n;
5778     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5779     ncols = 0;
5780     for (i = 0; i < nzB; i++) { /* row < local row index */
5781       if (cmap[i] < start) idx[ncols++] = cmap[i];
5782       else break;
5783     }
5784     imark = i;
5785     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5786     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5787     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5788     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5789   } else {
5790     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5791     isrowb = *rowb;
5792     iscolb = *colb;
5793     PetscCall(PetscMalloc1(1, &bseq));
5794     bseq[0] = *B_seq;
5795   }
5796   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5797   *B_seq = bseq[0];
5798   PetscCall(PetscFree(bseq));
5799   if (!rowb) {
5800     PetscCall(ISDestroy(&isrowb));
5801   } else {
5802     *rowb = isrowb;
5803   }
5804   if (!colb) {
5805     PetscCall(ISDestroy(&iscolb));
5806   } else {
5807     *colb = iscolb;
5808   }
5809   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5810   PetscFunctionReturn(PETSC_SUCCESS);
5811 }
5812 
5813 /*
5814     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5815     of the OFF-DIAGONAL portion of local A
5816 
5817     Collective
5818 
5819    Input Parameters:
5820 +    A,B - the matrices in `MATMPIAIJ` format
5821 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5822 
5823    Output Parameter:
5824 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5825 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5826 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5827 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5828 
5829     Developer Note:
5830     This directly accesses information inside the VecScatter associated with the matrix-vector product
5831      for this matrix. This is not desirable..
5832 
5833     Level: developer
5834 
5835 */
5836 
5837 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5838 {
5839   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5840   VecScatter         ctx;
5841   MPI_Comm           comm;
5842   const PetscMPIInt *rprocs, *sprocs;
5843   PetscMPIInt        nrecvs, nsends;
5844   const PetscInt    *srow, *rstarts, *sstarts;
5845   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5846   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5847   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5848   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5849   PetscMPIInt        size, tag, rank, nreqs;
5850 
5851   PetscFunctionBegin;
5852   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5853   PetscCallMPI(MPI_Comm_size(comm, &size));
5854 
5855   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5856              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5857   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5858   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5859 
5860   if (size == 1) {
5861     startsj_s = NULL;
5862     bufa_ptr  = NULL;
5863     *B_oth    = NULL;
5864     PetscFunctionReturn(PETSC_SUCCESS);
5865   }
5866 
5867   ctx = a->Mvctx;
5868   tag = ((PetscObject)ctx)->tag;
5869 
5870   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5871   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5872   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5873   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5874   PetscCall(PetscMalloc1(nreqs, &reqs));
5875   rwaits = reqs;
5876   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5877 
5878   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5879   if (scall == MAT_INITIAL_MATRIX) {
5880     /* i-array */
5881     /*  post receives */
5882     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5883     for (i = 0; i < nrecvs; i++) {
5884       rowlen = rvalues + rstarts[i] * rbs;
5885       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5886       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5887     }
5888 
5889     /* pack the outgoing message */
5890     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5891 
5892     sstartsj[0] = 0;
5893     rstartsj[0] = 0;
5894     len         = 0; /* total length of j or a array to be sent */
5895     if (nsends) {
5896       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5897       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5898     }
5899     for (i = 0; i < nsends; i++) {
5900       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5901       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5902       for (j = 0; j < nrows; j++) {
5903         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5904         for (l = 0; l < sbs; l++) {
5905           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5906 
5907           rowlen[j * sbs + l] = ncols;
5908 
5909           len += ncols;
5910           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5911         }
5912         k++;
5913       }
5914       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5915 
5916       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5917     }
5918     /* recvs and sends of i-array are completed */
5919     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5920     PetscCall(PetscFree(svalues));
5921 
5922     /* allocate buffers for sending j and a arrays */
5923     PetscCall(PetscMalloc1(len + 1, &bufj));
5924     PetscCall(PetscMalloc1(len + 1, &bufa));
5925 
5926     /* create i-array of B_oth */
5927     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5928 
5929     b_othi[0] = 0;
5930     len       = 0; /* total length of j or a array to be received */
5931     k         = 0;
5932     for (i = 0; i < nrecvs; i++) {
5933       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5934       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5935       for (j = 0; j < nrows; j++) {
5936         b_othi[k + 1] = b_othi[k] + rowlen[j];
5937         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5938         k++;
5939       }
5940       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5941     }
5942     PetscCall(PetscFree(rvalues));
5943 
5944     /* allocate space for j and a arrays of B_oth */
5945     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5946     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5947 
5948     /* j-array */
5949     /*  post receives of j-array */
5950     for (i = 0; i < nrecvs; i++) {
5951       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5952       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5953     }
5954 
5955     /* pack the outgoing message j-array */
5956     if (nsends) k = sstarts[0];
5957     for (i = 0; i < nsends; i++) {
5958       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5959       bufJ  = bufj + sstartsj[i];
5960       for (j = 0; j < nrows; j++) {
5961         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5962         for (ll = 0; ll < sbs; ll++) {
5963           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5964           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5965           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5966         }
5967       }
5968       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5969     }
5970 
5971     /* recvs and sends of j-array are completed */
5972     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5973   } else if (scall == MAT_REUSE_MATRIX) {
5974     sstartsj = *startsj_s;
5975     rstartsj = *startsj_r;
5976     bufa     = *bufa_ptr;
5977     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5978   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5979 
5980   /* a-array */
5981   /*  post receives of a-array */
5982   for (i = 0; i < nrecvs; i++) {
5983     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5984     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5985   }
5986 
5987   /* pack the outgoing message a-array */
5988   if (nsends) k = sstarts[0];
5989   for (i = 0; i < nsends; i++) {
5990     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5991     bufA  = bufa + sstartsj[i];
5992     for (j = 0; j < nrows; j++) {
5993       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5994       for (ll = 0; ll < sbs; ll++) {
5995         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5996         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5997         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5998       }
5999     }
6000     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6001   }
6002   /* recvs and sends of a-array are completed */
6003   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6004   PetscCall(PetscFree(reqs));
6005 
6006   if (scall == MAT_INITIAL_MATRIX) {
6007     Mat_SeqAIJ *b_oth;
6008 
6009     /* put together the new matrix */
6010     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6011 
6012     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6013     /* Since these are PETSc arrays, change flags to free them as necessary. */
6014     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6015     b_oth->free_a  = PETSC_TRUE;
6016     b_oth->free_ij = PETSC_TRUE;
6017     b_oth->nonew   = 0;
6018 
6019     PetscCall(PetscFree(bufj));
6020     if (!startsj_s || !bufa_ptr) {
6021       PetscCall(PetscFree2(sstartsj, rstartsj));
6022       PetscCall(PetscFree(bufa_ptr));
6023     } else {
6024       *startsj_s = sstartsj;
6025       *startsj_r = rstartsj;
6026       *bufa_ptr  = bufa;
6027     }
6028   } else if (scall == MAT_REUSE_MATRIX) {
6029     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6030   }
6031 
6032   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6033   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6034   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6035   PetscFunctionReturn(PETSC_SUCCESS);
6036 }
6037 
6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6041 #if defined(PETSC_HAVE_MKL_SPARSE)
6042 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6043 #endif
6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6046 #if defined(PETSC_HAVE_ELEMENTAL)
6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6048 #endif
6049 #if defined(PETSC_HAVE_SCALAPACK)
6050 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6051 #endif
6052 #if defined(PETSC_HAVE_HYPRE)
6053 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6054 #endif
6055 #if defined(PETSC_HAVE_CUDA)
6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6057 #endif
6058 #if defined(PETSC_HAVE_HIP)
6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6060 #endif
6061 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6062 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6063 #endif
6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6065 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6066 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6067 
6068 /*
6069     Computes (B'*A')' since computing B*A directly is untenable
6070 
6071                n                       p                          p
6072         [             ]       [             ]         [                 ]
6073       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6074         [             ]       [             ]         [                 ]
6075 
6076 */
6077 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6078 {
6079   Mat At, Bt, Ct;
6080 
6081   PetscFunctionBegin;
6082   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6083   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6084   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6085   PetscCall(MatDestroy(&At));
6086   PetscCall(MatDestroy(&Bt));
6087   PetscCall(MatTransposeSetPrecursor(Ct, C));
6088   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6089   PetscCall(MatDestroy(&Ct));
6090   PetscFunctionReturn(PETSC_SUCCESS);
6091 }
6092 
6093 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6094 {
6095   PetscBool cisdense;
6096 
6097   PetscFunctionBegin;
6098   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6099   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6100   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6101   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6102   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6103   PetscCall(MatSetUp(C));
6104 
6105   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6106   PetscFunctionReturn(PETSC_SUCCESS);
6107 }
6108 
6109 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6110 {
6111   Mat_Product *product = C->product;
6112   Mat          A = product->A, B = product->B;
6113 
6114   PetscFunctionBegin;
6115   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6116              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6117   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6118   C->ops->productsymbolic = MatProductSymbolic_AB;
6119   PetscFunctionReturn(PETSC_SUCCESS);
6120 }
6121 
6122 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6123 {
6124   Mat_Product *product = C->product;
6125 
6126   PetscFunctionBegin;
6127   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6128   PetscFunctionReturn(PETSC_SUCCESS);
6129 }
6130 
6131 /*
6132    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6133 
6134   Input Parameters:
6135 
6136     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6137     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6138 
6139     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6140 
6141     For Set1, j1[] contains column indices of the nonzeros.
6142     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6143     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6144     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6145 
6146     Similar for Set2.
6147 
6148     This routine merges the two sets of nonzeros row by row and removes repeats.
6149 
6150   Output Parameters: (memory is allocated by the caller)
6151 
6152     i[],j[]: the CSR of the merged matrix, which has m rows.
6153     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6154     imap2[]: similar to imap1[], but for Set2.
6155     Note we order nonzeros row-by-row and from left to right.
6156 */
6157 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6158 {
6159   PetscInt   r, m; /* Row index of mat */
6160   PetscCount t, t1, t2, b1, e1, b2, e2;
6161 
6162   PetscFunctionBegin;
6163   PetscCall(MatGetLocalSize(mat, &m, NULL));
6164   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6165   i[0]        = 0;
6166   for (r = 0; r < m; r++) { /* Do row by row merging */
6167     b1 = rowBegin1[r];
6168     e1 = rowEnd1[r];
6169     b2 = rowBegin2[r];
6170     e2 = rowEnd2[r];
6171     while (b1 < e1 && b2 < e2) {
6172       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6173         j[t]      = j1[b1];
6174         imap1[t1] = t;
6175         imap2[t2] = t;
6176         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6177         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6178         t1++;
6179         t2++;
6180         t++;
6181       } else if (j1[b1] < j2[b2]) {
6182         j[t]      = j1[b1];
6183         imap1[t1] = t;
6184         b1 += jmap1[t1 + 1] - jmap1[t1];
6185         t1++;
6186         t++;
6187       } else {
6188         j[t]      = j2[b2];
6189         imap2[t2] = t;
6190         b2 += jmap2[t2 + 1] - jmap2[t2];
6191         t2++;
6192         t++;
6193       }
6194     }
6195     /* Merge the remaining in either j1[] or j2[] */
6196     while (b1 < e1) {
6197       j[t]      = j1[b1];
6198       imap1[t1] = t;
6199       b1 += jmap1[t1 + 1] - jmap1[t1];
6200       t1++;
6201       t++;
6202     }
6203     while (b2 < e2) {
6204       j[t]      = j2[b2];
6205       imap2[t2] = t;
6206       b2 += jmap2[t2 + 1] - jmap2[t2];
6207       t2++;
6208       t++;
6209     }
6210     PetscCall(PetscIntCast(t, i + r + 1));
6211   }
6212   PetscFunctionReturn(PETSC_SUCCESS);
6213 }
6214 
6215 /*
6216   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6217 
6218   Input Parameters:
6219     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6220     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6221       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6222 
6223       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6224       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6225 
6226   Output Parameters:
6227     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6228     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6229       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6230       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6231 
6232     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6233       Atot: number of entries belonging to the diagonal block.
6234       Annz: number of unique nonzeros belonging to the diagonal block.
6235       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6236         repeats (i.e., same 'i,j' pair).
6237       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6238         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6239 
6240       Atot: number of entries belonging to the diagonal block
6241       Annz: number of unique nonzeros belonging to the diagonal block.
6242 
6243     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6244 
6245     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6246 */
6247 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6248 {
6249   PetscInt    cstart, cend, rstart, rend, row, col;
6250   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6251   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6252   PetscCount  k, m, p, q, r, s, mid;
6253   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6254 
6255   PetscFunctionBegin;
6256   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6257   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6258   m = rend - rstart;
6259 
6260   /* Skip negative rows */
6261   for (k = 0; k < n; k++)
6262     if (i[k] >= 0) break;
6263 
6264   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6265      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6266   */
6267   while (k < n) {
6268     row = i[k];
6269     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6270     for (s = k; s < n; s++)
6271       if (i[s] != row) break;
6272 
6273     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6274     for (p = k; p < s; p++) {
6275       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6276       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6277     }
6278     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6279     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6280     rowBegin[row - rstart] = k;
6281     rowMid[row - rstart]   = mid;
6282     rowEnd[row - rstart]   = s;
6283 
6284     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6285     Atot += mid - k;
6286     Btot += s - mid;
6287 
6288     /* Count unique nonzeros of this diag row */
6289     for (p = k; p < mid;) {
6290       col = j[p];
6291       do {
6292         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6293         p++;
6294       } while (p < mid && j[p] == col);
6295       Annz++;
6296     }
6297 
6298     /* Count unique nonzeros of this offdiag row */
6299     for (p = mid; p < s;) {
6300       col = j[p];
6301       do {
6302         p++;
6303       } while (p < s && j[p] == col);
6304       Bnnz++;
6305     }
6306     k = s;
6307   }
6308 
6309   /* Allocation according to Atot, Btot, Annz, Bnnz */
6310   PetscCall(PetscMalloc1(Atot, &Aperm));
6311   PetscCall(PetscMalloc1(Btot, &Bperm));
6312   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6313   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6314 
6315   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6316   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6317   for (r = 0; r < m; r++) {
6318     k   = rowBegin[r];
6319     mid = rowMid[r];
6320     s   = rowEnd[r];
6321     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6322     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6323     Atot += mid - k;
6324     Btot += s - mid;
6325 
6326     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6327     for (p = k; p < mid;) {
6328       col = j[p];
6329       q   = p;
6330       do {
6331         p++;
6332       } while (p < mid && j[p] == col);
6333       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6334       Annz++;
6335     }
6336 
6337     for (p = mid; p < s;) {
6338       col = j[p];
6339       q   = p;
6340       do {
6341         p++;
6342       } while (p < s && j[p] == col);
6343       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6344       Bnnz++;
6345     }
6346   }
6347   /* Output */
6348   *Aperm_ = Aperm;
6349   *Annz_  = Annz;
6350   *Atot_  = Atot;
6351   *Ajmap_ = Ajmap;
6352   *Bperm_ = Bperm;
6353   *Bnnz_  = Bnnz;
6354   *Btot_  = Btot;
6355   *Bjmap_ = Bjmap;
6356   PetscFunctionReturn(PETSC_SUCCESS);
6357 }
6358 
6359 /*
6360   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6361 
6362   Input Parameters:
6363     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6364     nnz:  number of unique nonzeros in the merged matrix
6365     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6366     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6367 
6368   Output Parameter: (memory is allocated by the caller)
6369     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6370 
6371   Example:
6372     nnz1 = 4
6373     nnz  = 6
6374     imap = [1,3,4,5]
6375     jmap = [0,3,5,6,7]
6376    then,
6377     jmap_new = [0,0,3,3,5,6,7]
6378 */
6379 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6380 {
6381   PetscCount k, p;
6382 
6383   PetscFunctionBegin;
6384   jmap_new[0] = 0;
6385   p           = nnz;                /* p loops over jmap_new[] backwards */
6386   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6387     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6388   }
6389   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6390   PetscFunctionReturn(PETSC_SUCCESS);
6391 }
6392 
6393 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data)
6394 {
6395   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data;
6396 
6397   PetscFunctionBegin;
6398   PetscCall(PetscSFDestroy(&coo->sf));
6399   PetscCall(PetscFree(coo->Aperm1));
6400   PetscCall(PetscFree(coo->Bperm1));
6401   PetscCall(PetscFree(coo->Ajmap1));
6402   PetscCall(PetscFree(coo->Bjmap1));
6403   PetscCall(PetscFree(coo->Aimap2));
6404   PetscCall(PetscFree(coo->Bimap2));
6405   PetscCall(PetscFree(coo->Aperm2));
6406   PetscCall(PetscFree(coo->Bperm2));
6407   PetscCall(PetscFree(coo->Ajmap2));
6408   PetscCall(PetscFree(coo->Bjmap2));
6409   PetscCall(PetscFree(coo->Cperm1));
6410   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6411   PetscCall(PetscFree(coo));
6412   PetscFunctionReturn(PETSC_SUCCESS);
6413 }
6414 
6415 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6416 {
6417   MPI_Comm             comm;
6418   PetscMPIInt          rank, size;
6419   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6420   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6421   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6422   PetscContainer       container;
6423   MatCOOStruct_MPIAIJ *coo;
6424 
6425   PetscFunctionBegin;
6426   PetscCall(PetscFree(mpiaij->garray));
6427   PetscCall(VecDestroy(&mpiaij->lvec));
6428 #if defined(PETSC_USE_CTABLE)
6429   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6430 #else
6431   PetscCall(PetscFree(mpiaij->colmap));
6432 #endif
6433   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6434   mat->assembled     = PETSC_FALSE;
6435   mat->was_assembled = PETSC_FALSE;
6436 
6437   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6438   PetscCallMPI(MPI_Comm_size(comm, &size));
6439   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6440   PetscCall(PetscLayoutSetUp(mat->rmap));
6441   PetscCall(PetscLayoutSetUp(mat->cmap));
6442   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6443   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6444   PetscCall(MatGetLocalSize(mat, &m, &n));
6445   PetscCall(MatGetSize(mat, &M, &N));
6446 
6447   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6448   /* entries come first, then local rows, then remote rows.                     */
6449   PetscCount n1 = coo_n, *perm1;
6450   PetscInt  *i1 = coo_i, *j1 = coo_j;
6451 
6452   PetscCall(PetscMalloc1(n1, &perm1));
6453   for (k = 0; k < n1; k++) perm1[k] = k;
6454 
6455   /* Manipulate indices so that entries with negative row or col indices will have smallest
6456      row indices, local entries will have greater but negative row indices, and remote entries
6457      will have positive row indices.
6458   */
6459   for (k = 0; k < n1; k++) {
6460     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6461     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6462     else {
6463       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6464       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6465     }
6466   }
6467 
6468   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6469   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6470 
6471   /* Advance k to the first entry we need to take care of */
6472   for (k = 0; k < n1; k++)
6473     if (i1[k] > PETSC_INT_MIN) break;
6474   PetscCount i1start = k;
6475 
6476   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6477   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6478 
6479   /*           Send remote rows to their owner                                  */
6480   /* Find which rows should be sent to which remote ranks*/
6481   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6482   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6483   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6484   const PetscInt *ranges;
6485   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6486 
6487   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6488   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6489   for (k = rem; k < n1;) {
6490     PetscMPIInt owner;
6491     PetscInt    firstRow, lastRow;
6492 
6493     /* Locate a row range */
6494     firstRow = i1[k]; /* first row of this owner */
6495     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6496     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6497 
6498     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6499     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6500 
6501     /* All entries in [k,p) belong to this remote owner */
6502     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6503       PetscMPIInt *sendto2;
6504       PetscInt    *nentries2;
6505       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6506 
6507       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6508       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6509       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6510       PetscCall(PetscFree2(sendto, nentries2));
6511       sendto   = sendto2;
6512       nentries = nentries2;
6513       maxNsend = maxNsend2;
6514     }
6515     sendto[nsend] = owner;
6516     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6517     nsend++;
6518     k = p;
6519   }
6520 
6521   /* Build 1st SF to know offsets on remote to send data */
6522   PetscSF      sf1;
6523   PetscInt     nroots = 1, nroots2 = 0;
6524   PetscInt     nleaves = nsend, nleaves2 = 0;
6525   PetscInt    *offsets;
6526   PetscSFNode *iremote;
6527 
6528   PetscCall(PetscSFCreate(comm, &sf1));
6529   PetscCall(PetscMalloc1(nsend, &iremote));
6530   PetscCall(PetscMalloc1(nsend, &offsets));
6531   for (k = 0; k < nsend; k++) {
6532     iremote[k].rank  = sendto[k];
6533     iremote[k].index = 0;
6534     nleaves2 += nentries[k];
6535     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6536   }
6537   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6538   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6539   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6540   PetscCall(PetscSFDestroy(&sf1));
6541   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6542 
6543   /* Build 2nd SF to send remote COOs to their owner */
6544   PetscSF sf2;
6545   nroots  = nroots2;
6546   nleaves = nleaves2;
6547   PetscCall(PetscSFCreate(comm, &sf2));
6548   PetscCall(PetscSFSetFromOptions(sf2));
6549   PetscCall(PetscMalloc1(nleaves, &iremote));
6550   p = 0;
6551   for (k = 0; k < nsend; k++) {
6552     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6553     for (q = 0; q < nentries[k]; q++, p++) {
6554       iremote[p].rank = sendto[k];
6555       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6556     }
6557   }
6558   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6559 
6560   /* Send the remote COOs to their owner */
6561   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6562   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6563   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6564   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6565   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6566   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6567   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6568   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6569   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6570   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6571   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6572 
6573   PetscCall(PetscFree(offsets));
6574   PetscCall(PetscFree2(sendto, nentries));
6575 
6576   /* Sort received COOs by row along with the permutation array     */
6577   for (k = 0; k < n2; k++) perm2[k] = k;
6578   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6579 
6580   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6581   PetscCount *Cperm1;
6582   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6583   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6584   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6585   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6586 
6587   /* Support for HYPRE matrices, kind of a hack.
6588      Swap min column with diagonal so that diagonal values will go first */
6589   PetscBool hypre;
6590   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6591   if (hypre) {
6592     PetscInt *minj;
6593     PetscBT   hasdiag;
6594 
6595     PetscCall(PetscBTCreate(m, &hasdiag));
6596     PetscCall(PetscMalloc1(m, &minj));
6597     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6598     for (k = i1start; k < rem; k++) {
6599       if (j1[k] < cstart || j1[k] >= cend) continue;
6600       const PetscInt rindex = i1[k] - rstart;
6601       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6602       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6603     }
6604     for (k = 0; k < n2; k++) {
6605       if (j2[k] < cstart || j2[k] >= cend) continue;
6606       const PetscInt rindex = i2[k] - rstart;
6607       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6608       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6609     }
6610     for (k = i1start; k < rem; k++) {
6611       const PetscInt rindex = i1[k] - rstart;
6612       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6613       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6614       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6615     }
6616     for (k = 0; k < n2; k++) {
6617       const PetscInt rindex = i2[k] - rstart;
6618       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6619       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6620       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6621     }
6622     PetscCall(PetscBTDestroy(&hasdiag));
6623     PetscCall(PetscFree(minj));
6624   }
6625 
6626   /* Split local COOs and received COOs into diag/offdiag portions */
6627   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6628   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6629   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6630   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6631   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6632   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6633 
6634   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6635   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6636   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6637   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6638 
6639   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6640   PetscInt *Ai, *Bi;
6641   PetscInt *Aj, *Bj;
6642 
6643   PetscCall(PetscMalloc1(m + 1, &Ai));
6644   PetscCall(PetscMalloc1(m + 1, &Bi));
6645   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6646   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6647 
6648   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6649   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6650   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6651   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6652   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6653 
6654   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6655   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6656 
6657   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6658   /* expect nonzeros in A/B most likely have local contributing entries        */
6659   PetscInt    Annz = Ai[m];
6660   PetscInt    Bnnz = Bi[m];
6661   PetscCount *Ajmap1_new, *Bjmap1_new;
6662 
6663   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6664   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6665 
6666   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6667   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6668 
6669   PetscCall(PetscFree(Aimap1));
6670   PetscCall(PetscFree(Ajmap1));
6671   PetscCall(PetscFree(Bimap1));
6672   PetscCall(PetscFree(Bjmap1));
6673   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6674   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6675   PetscCall(PetscFree(perm1));
6676   PetscCall(PetscFree3(i2, j2, perm2));
6677 
6678   Ajmap1 = Ajmap1_new;
6679   Bjmap1 = Bjmap1_new;
6680 
6681   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6682   if (Annz < Annz1 + Annz2) {
6683     PetscInt *Aj_new;
6684     PetscCall(PetscMalloc1(Annz, &Aj_new));
6685     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6686     PetscCall(PetscFree(Aj));
6687     Aj = Aj_new;
6688   }
6689 
6690   if (Bnnz < Bnnz1 + Bnnz2) {
6691     PetscInt *Bj_new;
6692     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6693     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6694     PetscCall(PetscFree(Bj));
6695     Bj = Bj_new;
6696   }
6697 
6698   /* Create new submatrices for on-process and off-process coupling                  */
6699   PetscScalar     *Aa, *Ba;
6700   MatType          rtype;
6701   Mat_SeqAIJ      *a, *b;
6702   PetscObjectState state;
6703   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6704   PetscCall(PetscCalloc1(Bnnz, &Ba));
6705   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6706   if (cstart) {
6707     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6708   }
6709 
6710   PetscCall(MatGetRootType_Private(mat, &rtype));
6711 
6712   MatSeqXAIJGetOptions_Private(mpiaij->A);
6713   PetscCall(MatDestroy(&mpiaij->A));
6714   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6715   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6716   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6717 
6718   MatSeqXAIJGetOptions_Private(mpiaij->B);
6719   PetscCall(MatDestroy(&mpiaij->B));
6720   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6721   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6722   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6723 
6724   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6725   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6726   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6727   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6728 
6729   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6730   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6731   a->free_a  = PETSC_TRUE;
6732   a->free_ij = PETSC_TRUE;
6733   b->free_a  = PETSC_TRUE;
6734   b->free_ij = PETSC_TRUE;
6735   a->maxnz   = a->nz;
6736   b->maxnz   = b->nz;
6737 
6738   /* conversion must happen AFTER multiply setup */
6739   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6740   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6741   PetscCall(VecDestroy(&mpiaij->lvec));
6742   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6743 
6744   // Put the COO struct in a container and then attach that to the matrix
6745   PetscCall(PetscMalloc1(1, &coo));
6746   coo->n       = coo_n;
6747   coo->sf      = sf2;
6748   coo->sendlen = nleaves;
6749   coo->recvlen = nroots;
6750   coo->Annz    = Annz;
6751   coo->Bnnz    = Bnnz;
6752   coo->Annz2   = Annz2;
6753   coo->Bnnz2   = Bnnz2;
6754   coo->Atot1   = Atot1;
6755   coo->Atot2   = Atot2;
6756   coo->Btot1   = Btot1;
6757   coo->Btot2   = Btot2;
6758   coo->Ajmap1  = Ajmap1;
6759   coo->Aperm1  = Aperm1;
6760   coo->Bjmap1  = Bjmap1;
6761   coo->Bperm1  = Bperm1;
6762   coo->Aimap2  = Aimap2;
6763   coo->Ajmap2  = Ajmap2;
6764   coo->Aperm2  = Aperm2;
6765   coo->Bimap2  = Bimap2;
6766   coo->Bjmap2  = Bjmap2;
6767   coo->Bperm2  = Bperm2;
6768   coo->Cperm1  = Cperm1;
6769   // Allocate in preallocation. If not used, it has zero cost on host
6770   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6771   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6772   PetscCall(PetscContainerSetPointer(container, coo));
6773   PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ));
6774   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6775   PetscCall(PetscContainerDestroy(&container));
6776   PetscFunctionReturn(PETSC_SUCCESS);
6777 }
6778 
6779 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6780 {
6781   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6782   Mat                  A = mpiaij->A, B = mpiaij->B;
6783   PetscScalar         *Aa, *Ba;
6784   PetscScalar         *sendbuf, *recvbuf;
6785   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6786   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6787   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6788   const PetscCount    *Cperm1;
6789   PetscContainer       container;
6790   MatCOOStruct_MPIAIJ *coo;
6791 
6792   PetscFunctionBegin;
6793   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6794   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6795   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6796   sendbuf = coo->sendbuf;
6797   recvbuf = coo->recvbuf;
6798   Ajmap1  = coo->Ajmap1;
6799   Ajmap2  = coo->Ajmap2;
6800   Aimap2  = coo->Aimap2;
6801   Bjmap1  = coo->Bjmap1;
6802   Bjmap2  = coo->Bjmap2;
6803   Bimap2  = coo->Bimap2;
6804   Aperm1  = coo->Aperm1;
6805   Aperm2  = coo->Aperm2;
6806   Bperm1  = coo->Bperm1;
6807   Bperm2  = coo->Bperm2;
6808   Cperm1  = coo->Cperm1;
6809 
6810   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6811   PetscCall(MatSeqAIJGetArray(B, &Ba));
6812 
6813   /* Pack entries to be sent to remote */
6814   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6815 
6816   /* Send remote entries to their owner and overlap the communication with local computation */
6817   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6818   /* Add local entries to A and B */
6819   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6820     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6821     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6822     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6823   }
6824   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6825     PetscScalar sum = 0.0;
6826     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6827     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6828   }
6829   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6830 
6831   /* Add received remote entries to A and B */
6832   for (PetscCount i = 0; i < coo->Annz2; i++) {
6833     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6834   }
6835   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6836     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6837   }
6838   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6839   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6840   PetscFunctionReturn(PETSC_SUCCESS);
6841 }
6842 
6843 /*MC
6844    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6845 
6846    Options Database Keys:
6847 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6848 
6849    Level: beginner
6850 
6851    Notes:
6852    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6853     in this case the values associated with the rows and columns one passes in are set to zero
6854     in the matrix
6855 
6856     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6857     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6858 
6859 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6860 M*/
6861 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6862 {
6863   Mat_MPIAIJ *b;
6864   PetscMPIInt size;
6865 
6866   PetscFunctionBegin;
6867   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6868 
6869   PetscCall(PetscNew(&b));
6870   B->data       = (void *)b;
6871   B->ops[0]     = MatOps_Values;
6872   B->assembled  = PETSC_FALSE;
6873   B->insertmode = NOT_SET_VALUES;
6874   b->size       = size;
6875 
6876   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6877 
6878   /* build cache for off array entries formed */
6879   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6880 
6881   b->donotstash  = PETSC_FALSE;
6882   b->colmap      = NULL;
6883   b->garray      = NULL;
6884   b->roworiented = PETSC_TRUE;
6885 
6886   /* stuff used for matrix vector multiply */
6887   b->lvec  = NULL;
6888   b->Mvctx = NULL;
6889 
6890   /* stuff for MatGetRow() */
6891   b->rowindices   = NULL;
6892   b->rowvalues    = NULL;
6893   b->getrowactive = PETSC_FALSE;
6894 
6895   /* flexible pointer used in CUSPARSE classes */
6896   b->spptr = NULL;
6897 
6898   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6899   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6901   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6902   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6904   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6908 #if defined(PETSC_HAVE_CUDA)
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6910 #endif
6911 #if defined(PETSC_HAVE_HIP)
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6913 #endif
6914 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6916 #endif
6917 #if defined(PETSC_HAVE_MKL_SPARSE)
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6919 #endif
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6924 #if defined(PETSC_HAVE_ELEMENTAL)
6925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6926 #endif
6927 #if defined(PETSC_HAVE_SCALAPACK)
6928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6929 #endif
6930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6932 #if defined(PETSC_HAVE_HYPRE)
6933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6935 #endif
6936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6940   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6941   PetscFunctionReturn(PETSC_SUCCESS);
6942 }
6943 
6944 /*@
6945   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6946   and "off-diagonal" part of the matrix in CSR format.
6947 
6948   Collective
6949 
6950   Input Parameters:
6951 + comm - MPI communicator
6952 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6953 . n    - This value should be the same as the local size used in creating the
6954          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6955          calculated if `N` is given) For square matrices `n` is almost always `m`.
6956 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6957 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6958 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6959 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6960 . a    - matrix values
6961 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6962 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6963 - oa   - matrix values
6964 
6965   Output Parameter:
6966 . mat - the matrix
6967 
6968   Level: advanced
6969 
6970   Notes:
6971   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6972   must free the arrays once the matrix has been destroyed and not before.
6973 
6974   The `i` and `j` indices are 0 based
6975 
6976   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6977 
6978   This sets local rows and cannot be used to set off-processor values.
6979 
6980   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6981   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6982   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6983   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6984   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6985   communication if it is known that only local entries will be set.
6986 
6987 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6988           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6989 @*/
6990 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6991 {
6992   Mat_MPIAIJ *maij;
6993 
6994   PetscFunctionBegin;
6995   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6996   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6997   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6998   PetscCall(MatCreate(comm, mat));
6999   PetscCall(MatSetSizes(*mat, m, n, M, N));
7000   PetscCall(MatSetType(*mat, MATMPIAIJ));
7001   maij = (Mat_MPIAIJ *)(*mat)->data;
7002 
7003   (*mat)->preallocated = PETSC_TRUE;
7004 
7005   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7006   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7007 
7008   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7009   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7010 
7011   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7012   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7013   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7014   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7015   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7016   PetscFunctionReturn(PETSC_SUCCESS);
7017 }
7018 
7019 typedef struct {
7020   Mat       *mp;    /* intermediate products */
7021   PetscBool *mptmp; /* is the intermediate product temporary ? */
7022   PetscInt   cp;    /* number of intermediate products */
7023 
7024   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7025   PetscInt    *startsj_s, *startsj_r;
7026   PetscScalar *bufa;
7027   Mat          P_oth;
7028 
7029   /* may take advantage of merging product->B */
7030   Mat Bloc; /* B-local by merging diag and off-diag */
7031 
7032   /* cusparse does not have support to split between symbolic and numeric phases.
7033      When api_user is true, we don't need to update the numerical values
7034      of the temporary storage */
7035   PetscBool reusesym;
7036 
7037   /* support for COO values insertion */
7038   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7039   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7040   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7041   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7042   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7043   PetscMemType mtype;
7044 
7045   /* customization */
7046   PetscBool abmerge;
7047   PetscBool P_oth_bind;
7048 } MatMatMPIAIJBACKEND;
7049 
7050 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7051 {
7052   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7053   PetscInt             i;
7054 
7055   PetscFunctionBegin;
7056   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7057   PetscCall(PetscFree(mmdata->bufa));
7058   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7059   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7060   PetscCall(MatDestroy(&mmdata->P_oth));
7061   PetscCall(MatDestroy(&mmdata->Bloc));
7062   PetscCall(PetscSFDestroy(&mmdata->sf));
7063   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7064   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7065   PetscCall(PetscFree(mmdata->own[0]));
7066   PetscCall(PetscFree(mmdata->own));
7067   PetscCall(PetscFree(mmdata->off[0]));
7068   PetscCall(PetscFree(mmdata->off));
7069   PetscCall(PetscFree(mmdata));
7070   PetscFunctionReturn(PETSC_SUCCESS);
7071 }
7072 
7073 /* Copy selected n entries with indices in idx[] of A to v[].
7074    If idx is NULL, copy the whole data array of A to v[]
7075  */
7076 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7077 {
7078   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7079 
7080   PetscFunctionBegin;
7081   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7082   if (f) {
7083     PetscCall((*f)(A, n, idx, v));
7084   } else {
7085     const PetscScalar *vv;
7086 
7087     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7088     if (n && idx) {
7089       PetscScalar    *w  = v;
7090       const PetscInt *oi = idx;
7091       PetscInt        j;
7092 
7093       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7094     } else {
7095       PetscCall(PetscArraycpy(v, vv, n));
7096     }
7097     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7098   }
7099   PetscFunctionReturn(PETSC_SUCCESS);
7100 }
7101 
7102 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7103 {
7104   MatMatMPIAIJBACKEND *mmdata;
7105   PetscInt             i, n_d, n_o;
7106 
7107   PetscFunctionBegin;
7108   MatCheckProduct(C, 1);
7109   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7110   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7111   if (!mmdata->reusesym) { /* update temporary matrices */
7112     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7113     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7114   }
7115   mmdata->reusesym = PETSC_FALSE;
7116 
7117   for (i = 0; i < mmdata->cp; i++) {
7118     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7119     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7120   }
7121   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7122     PetscInt noff;
7123 
7124     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7125     if (mmdata->mptmp[i]) continue;
7126     if (noff) {
7127       PetscInt nown;
7128 
7129       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7130       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7131       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7132       n_o += noff;
7133       n_d += nown;
7134     } else {
7135       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7136 
7137       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7138       n_d += mm->nz;
7139     }
7140   }
7141   if (mmdata->hasoffproc) { /* offprocess insertion */
7142     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7143     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7144   }
7145   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7146   PetscFunctionReturn(PETSC_SUCCESS);
7147 }
7148 
7149 /* Support for Pt * A, A * P, or Pt * A * P */
7150 #define MAX_NUMBER_INTERMEDIATE 4
7151 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7152 {
7153   Mat_Product           *product = C->product;
7154   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7155   Mat_MPIAIJ            *a, *p;
7156   MatMatMPIAIJBACKEND   *mmdata;
7157   ISLocalToGlobalMapping P_oth_l2g = NULL;
7158   IS                     glob      = NULL;
7159   const char            *prefix;
7160   char                   pprefix[256];
7161   const PetscInt        *globidx, *P_oth_idx;
7162   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7163   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7164   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7165                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7166                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7167   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7168 
7169   MatProductType ptype;
7170   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7171   PetscMPIInt    size;
7172 
7173   PetscFunctionBegin;
7174   MatCheckProduct(C, 1);
7175   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7176   ptype = product->type;
7177   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7178     ptype                                          = MATPRODUCT_AB;
7179     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7180   }
7181   switch (ptype) {
7182   case MATPRODUCT_AB:
7183     A          = product->A;
7184     P          = product->B;
7185     m          = A->rmap->n;
7186     n          = P->cmap->n;
7187     M          = A->rmap->N;
7188     N          = P->cmap->N;
7189     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7190     break;
7191   case MATPRODUCT_AtB:
7192     P          = product->A;
7193     A          = product->B;
7194     m          = P->cmap->n;
7195     n          = A->cmap->n;
7196     M          = P->cmap->N;
7197     N          = A->cmap->N;
7198     hasoffproc = PETSC_TRUE;
7199     break;
7200   case MATPRODUCT_PtAP:
7201     A          = product->A;
7202     P          = product->B;
7203     m          = P->cmap->n;
7204     n          = P->cmap->n;
7205     M          = P->cmap->N;
7206     N          = P->cmap->N;
7207     hasoffproc = PETSC_TRUE;
7208     break;
7209   default:
7210     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7211   }
7212   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7213   if (size == 1) hasoffproc = PETSC_FALSE;
7214 
7215   /* defaults */
7216   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7217     mp[i]    = NULL;
7218     mptmp[i] = PETSC_FALSE;
7219     rmapt[i] = -1;
7220     cmapt[i] = -1;
7221     rmapa[i] = NULL;
7222     cmapa[i] = NULL;
7223   }
7224 
7225   /* customization */
7226   PetscCall(PetscNew(&mmdata));
7227   mmdata->reusesym = product->api_user;
7228   if (ptype == MATPRODUCT_AB) {
7229     if (product->api_user) {
7230       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7231       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7232       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7233       PetscOptionsEnd();
7234     } else {
7235       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7236       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7237       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7238       PetscOptionsEnd();
7239     }
7240   } else if (ptype == MATPRODUCT_PtAP) {
7241     if (product->api_user) {
7242       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7243       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7244       PetscOptionsEnd();
7245     } else {
7246       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7247       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7248       PetscOptionsEnd();
7249     }
7250   }
7251   a = (Mat_MPIAIJ *)A->data;
7252   p = (Mat_MPIAIJ *)P->data;
7253   PetscCall(MatSetSizes(C, m, n, M, N));
7254   PetscCall(PetscLayoutSetUp(C->rmap));
7255   PetscCall(PetscLayoutSetUp(C->cmap));
7256   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7257   PetscCall(MatGetOptionsPrefix(C, &prefix));
7258 
7259   cp = 0;
7260   switch (ptype) {
7261   case MATPRODUCT_AB: /* A * P */
7262     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7263 
7264     /* A_diag * P_local (merged or not) */
7265     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7266       /* P is product->B */
7267       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7268       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7269       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7270       PetscCall(MatProductSetFill(mp[cp], product->fill));
7271       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7272       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7273       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7274       mp[cp]->product->api_user = product->api_user;
7275       PetscCall(MatProductSetFromOptions(mp[cp]));
7276       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7277       PetscCall(ISGetIndices(glob, &globidx));
7278       rmapt[cp] = 1;
7279       cmapt[cp] = 2;
7280       cmapa[cp] = globidx;
7281       mptmp[cp] = PETSC_FALSE;
7282       cp++;
7283     } else { /* A_diag * P_diag and A_diag * P_off */
7284       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7285       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7286       PetscCall(MatProductSetFill(mp[cp], product->fill));
7287       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7288       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7289       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7290       mp[cp]->product->api_user = product->api_user;
7291       PetscCall(MatProductSetFromOptions(mp[cp]));
7292       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7293       rmapt[cp] = 1;
7294       cmapt[cp] = 1;
7295       mptmp[cp] = PETSC_FALSE;
7296       cp++;
7297       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7298       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7299       PetscCall(MatProductSetFill(mp[cp], product->fill));
7300       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7301       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7302       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7303       mp[cp]->product->api_user = product->api_user;
7304       PetscCall(MatProductSetFromOptions(mp[cp]));
7305       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7306       rmapt[cp] = 1;
7307       cmapt[cp] = 2;
7308       cmapa[cp] = p->garray;
7309       mptmp[cp] = PETSC_FALSE;
7310       cp++;
7311     }
7312 
7313     /* A_off * P_other */
7314     if (mmdata->P_oth) {
7315       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7316       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7317       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7318       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7319       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7320       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7321       PetscCall(MatProductSetFill(mp[cp], product->fill));
7322       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7323       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7324       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7325       mp[cp]->product->api_user = product->api_user;
7326       PetscCall(MatProductSetFromOptions(mp[cp]));
7327       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7328       rmapt[cp] = 1;
7329       cmapt[cp] = 2;
7330       cmapa[cp] = P_oth_idx;
7331       mptmp[cp] = PETSC_FALSE;
7332       cp++;
7333     }
7334     break;
7335 
7336   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7337     /* A is product->B */
7338     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7339     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7340       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7341       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7342       PetscCall(MatProductSetFill(mp[cp], product->fill));
7343       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7344       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7345       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7346       mp[cp]->product->api_user = product->api_user;
7347       PetscCall(MatProductSetFromOptions(mp[cp]));
7348       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7349       PetscCall(ISGetIndices(glob, &globidx));
7350       rmapt[cp] = 2;
7351       rmapa[cp] = globidx;
7352       cmapt[cp] = 2;
7353       cmapa[cp] = globidx;
7354       mptmp[cp] = PETSC_FALSE;
7355       cp++;
7356     } else {
7357       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7358       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7359       PetscCall(MatProductSetFill(mp[cp], product->fill));
7360       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7361       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7362       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7363       mp[cp]->product->api_user = product->api_user;
7364       PetscCall(MatProductSetFromOptions(mp[cp]));
7365       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7366       PetscCall(ISGetIndices(glob, &globidx));
7367       rmapt[cp] = 1;
7368       cmapt[cp] = 2;
7369       cmapa[cp] = globidx;
7370       mptmp[cp] = PETSC_FALSE;
7371       cp++;
7372       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7373       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7374       PetscCall(MatProductSetFill(mp[cp], product->fill));
7375       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7376       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7377       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7378       mp[cp]->product->api_user = product->api_user;
7379       PetscCall(MatProductSetFromOptions(mp[cp]));
7380       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7381       rmapt[cp] = 2;
7382       rmapa[cp] = p->garray;
7383       cmapt[cp] = 2;
7384       cmapa[cp] = globidx;
7385       mptmp[cp] = PETSC_FALSE;
7386       cp++;
7387     }
7388     break;
7389   case MATPRODUCT_PtAP:
7390     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7391     /* P is product->B */
7392     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7393     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7394     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7395     PetscCall(MatProductSetFill(mp[cp], product->fill));
7396     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7397     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7398     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7399     mp[cp]->product->api_user = product->api_user;
7400     PetscCall(MatProductSetFromOptions(mp[cp]));
7401     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7402     PetscCall(ISGetIndices(glob, &globidx));
7403     rmapt[cp] = 2;
7404     rmapa[cp] = globidx;
7405     cmapt[cp] = 2;
7406     cmapa[cp] = globidx;
7407     mptmp[cp] = PETSC_FALSE;
7408     cp++;
7409     if (mmdata->P_oth) {
7410       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7411       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7412       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7413       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7414       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7415       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7416       PetscCall(MatProductSetFill(mp[cp], product->fill));
7417       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7418       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7419       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7420       mp[cp]->product->api_user = product->api_user;
7421       PetscCall(MatProductSetFromOptions(mp[cp]));
7422       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7423       mptmp[cp] = PETSC_TRUE;
7424       cp++;
7425       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7426       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7427       PetscCall(MatProductSetFill(mp[cp], product->fill));
7428       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7429       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7430       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7431       mp[cp]->product->api_user = product->api_user;
7432       PetscCall(MatProductSetFromOptions(mp[cp]));
7433       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7434       rmapt[cp] = 2;
7435       rmapa[cp] = globidx;
7436       cmapt[cp] = 2;
7437       cmapa[cp] = P_oth_idx;
7438       mptmp[cp] = PETSC_FALSE;
7439       cp++;
7440     }
7441     break;
7442   default:
7443     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7444   }
7445   /* sanity check */
7446   if (size > 1)
7447     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7448 
7449   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7450   for (i = 0; i < cp; i++) {
7451     mmdata->mp[i]    = mp[i];
7452     mmdata->mptmp[i] = mptmp[i];
7453   }
7454   mmdata->cp             = cp;
7455   C->product->data       = mmdata;
7456   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7457   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7458 
7459   /* memory type */
7460   mmdata->mtype = PETSC_MEMTYPE_HOST;
7461   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7462   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7463   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7464   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7465   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7466   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7467 
7468   /* prepare coo coordinates for values insertion */
7469 
7470   /* count total nonzeros of those intermediate seqaij Mats
7471     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7472     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7473     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7474   */
7475   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7476     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7477     if (mptmp[cp]) continue;
7478     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7479       const PetscInt *rmap = rmapa[cp];
7480       const PetscInt  mr   = mp[cp]->rmap->n;
7481       const PetscInt  rs   = C->rmap->rstart;
7482       const PetscInt  re   = C->rmap->rend;
7483       const PetscInt *ii   = mm->i;
7484       for (i = 0; i < mr; i++) {
7485         const PetscInt gr = rmap[i];
7486         const PetscInt nz = ii[i + 1] - ii[i];
7487         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7488         else ncoo_oown += nz;                  /* this row is local */
7489       }
7490     } else ncoo_d += mm->nz;
7491   }
7492 
7493   /*
7494     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7495 
7496     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7497 
7498     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7499 
7500     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7501     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7502     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7503 
7504     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7505     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7506   */
7507   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7508   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7509 
7510   /* gather (i,j) of nonzeros inserted by remote procs */
7511   if (hasoffproc) {
7512     PetscSF  msf;
7513     PetscInt ncoo2, *coo_i2, *coo_j2;
7514 
7515     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7516     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7517     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7518 
7519     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7520       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7521       PetscInt   *idxoff = mmdata->off[cp];
7522       PetscInt   *idxown = mmdata->own[cp];
7523       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7524         const PetscInt *rmap = rmapa[cp];
7525         const PetscInt *cmap = cmapa[cp];
7526         const PetscInt *ii   = mm->i;
7527         PetscInt       *coi  = coo_i + ncoo_o;
7528         PetscInt       *coj  = coo_j + ncoo_o;
7529         const PetscInt  mr   = mp[cp]->rmap->n;
7530         const PetscInt  rs   = C->rmap->rstart;
7531         const PetscInt  re   = C->rmap->rend;
7532         const PetscInt  cs   = C->cmap->rstart;
7533         for (i = 0; i < mr; i++) {
7534           const PetscInt *jj = mm->j + ii[i];
7535           const PetscInt  gr = rmap[i];
7536           const PetscInt  nz = ii[i + 1] - ii[i];
7537           if (gr < rs || gr >= re) { /* this is an offproc row */
7538             for (j = ii[i]; j < ii[i + 1]; j++) {
7539               *coi++    = gr;
7540               *idxoff++ = j;
7541             }
7542             if (!cmapt[cp]) { /* already global */
7543               for (j = 0; j < nz; j++) *coj++ = jj[j];
7544             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7545               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7546             } else { /* offdiag */
7547               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7548             }
7549             ncoo_o += nz;
7550           } else { /* this is a local row */
7551             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7552           }
7553         }
7554       }
7555       mmdata->off[cp + 1] = idxoff;
7556       mmdata->own[cp + 1] = idxown;
7557     }
7558 
7559     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7560     PetscInt incoo_o;
7561     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7562     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7563     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7564     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7565     ncoo = ncoo_d + ncoo_oown + ncoo2;
7566     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7567     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7568     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7569     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7570     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7571     PetscCall(PetscFree2(coo_i, coo_j));
7572     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7573     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7574     coo_i = coo_i2;
7575     coo_j = coo_j2;
7576   } else { /* no offproc values insertion */
7577     ncoo = ncoo_d;
7578     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7579 
7580     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7581     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7582     PetscCall(PetscSFSetUp(mmdata->sf));
7583   }
7584   mmdata->hasoffproc = hasoffproc;
7585 
7586   /* gather (i,j) of nonzeros inserted locally */
7587   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7588     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7589     PetscInt       *coi  = coo_i + ncoo_d;
7590     PetscInt       *coj  = coo_j + ncoo_d;
7591     const PetscInt *jj   = mm->j;
7592     const PetscInt *ii   = mm->i;
7593     const PetscInt *cmap = cmapa[cp];
7594     const PetscInt *rmap = rmapa[cp];
7595     const PetscInt  mr   = mp[cp]->rmap->n;
7596     const PetscInt  rs   = C->rmap->rstart;
7597     const PetscInt  re   = C->rmap->rend;
7598     const PetscInt  cs   = C->cmap->rstart;
7599 
7600     if (mptmp[cp]) continue;
7601     if (rmapt[cp] == 1) { /* consecutive rows */
7602       /* fill coo_i */
7603       for (i = 0; i < mr; i++) {
7604         const PetscInt gr = i + rs;
7605         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7606       }
7607       /* fill coo_j */
7608       if (!cmapt[cp]) { /* type-0, already global */
7609         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7610       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7611         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7612       } else {                                            /* type-2, local to global for sparse columns */
7613         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7614       }
7615       ncoo_d += mm->nz;
7616     } else if (rmapt[cp] == 2) { /* sparse rows */
7617       for (i = 0; i < mr; i++) {
7618         const PetscInt *jj = mm->j + ii[i];
7619         const PetscInt  gr = rmap[i];
7620         const PetscInt  nz = ii[i + 1] - ii[i];
7621         if (gr >= rs && gr < re) { /* local rows */
7622           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7623           if (!cmapt[cp]) { /* type-0, already global */
7624             for (j = 0; j < nz; j++) *coj++ = jj[j];
7625           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7626             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7627           } else { /* type-2, local to global for sparse columns */
7628             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7629           }
7630           ncoo_d += nz;
7631         }
7632       }
7633     }
7634   }
7635   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7636   PetscCall(ISDestroy(&glob));
7637   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7638   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7639   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7640   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7641 
7642   /* preallocate with COO data */
7643   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7644   PetscCall(PetscFree2(coo_i, coo_j));
7645   PetscFunctionReturn(PETSC_SUCCESS);
7646 }
7647 
7648 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7649 {
7650   Mat_Product *product = mat->product;
7651 #if defined(PETSC_HAVE_DEVICE)
7652   PetscBool match  = PETSC_FALSE;
7653   PetscBool usecpu = PETSC_FALSE;
7654 #else
7655   PetscBool match = PETSC_TRUE;
7656 #endif
7657 
7658   PetscFunctionBegin;
7659   MatCheckProduct(mat, 1);
7660 #if defined(PETSC_HAVE_DEVICE)
7661   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7662   if (match) { /* we can always fallback to the CPU if requested */
7663     switch (product->type) {
7664     case MATPRODUCT_AB:
7665       if (product->api_user) {
7666         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7667         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7668         PetscOptionsEnd();
7669       } else {
7670         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7671         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7672         PetscOptionsEnd();
7673       }
7674       break;
7675     case MATPRODUCT_AtB:
7676       if (product->api_user) {
7677         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7678         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7679         PetscOptionsEnd();
7680       } else {
7681         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7682         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7683         PetscOptionsEnd();
7684       }
7685       break;
7686     case MATPRODUCT_PtAP:
7687       if (product->api_user) {
7688         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7689         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7690         PetscOptionsEnd();
7691       } else {
7692         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7693         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7694         PetscOptionsEnd();
7695       }
7696       break;
7697     default:
7698       break;
7699     }
7700     match = (PetscBool)!usecpu;
7701   }
7702 #endif
7703   if (match) {
7704     switch (product->type) {
7705     case MATPRODUCT_AB:
7706     case MATPRODUCT_AtB:
7707     case MATPRODUCT_PtAP:
7708       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7709       break;
7710     default:
7711       break;
7712     }
7713   }
7714   /* fallback to MPIAIJ ops */
7715   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7716   PetscFunctionReturn(PETSC_SUCCESS);
7717 }
7718 
7719 /*
7720    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7721 
7722    n - the number of block indices in cc[]
7723    cc - the block indices (must be large enough to contain the indices)
7724 */
7725 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7726 {
7727   PetscInt        cnt = -1, nidx, j;
7728   const PetscInt *idx;
7729 
7730   PetscFunctionBegin;
7731   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7732   if (nidx) {
7733     cnt     = 0;
7734     cc[cnt] = idx[0] / bs;
7735     for (j = 1; j < nidx; j++) {
7736       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7737     }
7738   }
7739   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7740   *n = cnt + 1;
7741   PetscFunctionReturn(PETSC_SUCCESS);
7742 }
7743 
7744 /*
7745     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7746 
7747     ncollapsed - the number of block indices
7748     collapsed - the block indices (must be large enough to contain the indices)
7749 */
7750 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7751 {
7752   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7753 
7754   PetscFunctionBegin;
7755   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7756   for (i = start + 1; i < start + bs; i++) {
7757     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7758     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7759     cprevtmp = cprev;
7760     cprev    = merged;
7761     merged   = cprevtmp;
7762   }
7763   *ncollapsed = nprev;
7764   if (collapsed) *collapsed = cprev;
7765   PetscFunctionReturn(PETSC_SUCCESS);
7766 }
7767 
7768 /*
7769  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7770 
7771  Input Parameter:
7772  . Amat - matrix
7773  - symmetrize - make the result symmetric
7774  + scale - scale with diagonal
7775 
7776  Output Parameter:
7777  . a_Gmat - output scalar graph >= 0
7778 
7779 */
7780 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7781 {
7782   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7783   MPI_Comm  comm;
7784   Mat       Gmat;
7785   PetscBool ismpiaij, isseqaij;
7786   Mat       a, b, c;
7787   MatType   jtype;
7788 
7789   PetscFunctionBegin;
7790   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7791   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7792   PetscCall(MatGetSize(Amat, &MM, &NN));
7793   PetscCall(MatGetBlockSize(Amat, &bs));
7794   nloc = (Iend - Istart) / bs;
7795 
7796   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7797   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7798   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7799 
7800   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7801   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7802      implementation */
7803   if (bs > 1) {
7804     PetscCall(MatGetType(Amat, &jtype));
7805     PetscCall(MatCreate(comm, &Gmat));
7806     PetscCall(MatSetType(Gmat, jtype));
7807     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7808     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7809     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7810       PetscInt  *d_nnz, *o_nnz;
7811       MatScalar *aa, val, *AA;
7812       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7813 
7814       if (isseqaij) {
7815         a = Amat;
7816         b = NULL;
7817       } else {
7818         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7819         a             = d->A;
7820         b             = d->B;
7821       }
7822       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7823       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7824       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7825         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7826         const PetscInt *cols1, *cols2;
7827 
7828         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7829           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7830           nnz[brow / bs] = nc2 / bs;
7831           if (nc2 % bs) ok = 0;
7832           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7833           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7834             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7835             if (nc1 != nc2) ok = 0;
7836             else {
7837               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7838                 if (cols1[jj] != cols2[jj]) ok = 0;
7839                 if (cols1[jj] % bs != jj % bs) ok = 0;
7840               }
7841             }
7842             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7843           }
7844           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7845           if (!ok) {
7846             PetscCall(PetscFree2(d_nnz, o_nnz));
7847             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7848             goto old_bs;
7849           }
7850         }
7851       }
7852       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7853       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7854       PetscCall(PetscFree2(d_nnz, o_nnz));
7855       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7856       // diag
7857       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7858         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7859 
7860         ai = aseq->i;
7861         n  = ai[brow + 1] - ai[brow];
7862         aj = aseq->j + ai[brow];
7863         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7864           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7865           val        = 0;
7866           if (index_size == 0) {
7867             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7868               aa = aseq->a + ai[brow + ii] + k;
7869               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7870                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7871               }
7872             }
7873           } else {                                            // use (index,index) value if provided
7874             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7875               PetscInt ii = index[iii];
7876               aa          = aseq->a + ai[brow + ii] + k;
7877               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7878                 PetscInt jj = index[jjj];
7879                 val += PetscAbs(PetscRealPart(aa[jj]));
7880               }
7881             }
7882           }
7883           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7884           AA[k / bs] = val;
7885         }
7886         grow = Istart / bs + brow / bs;
7887         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7888       }
7889       // off-diag
7890       if (ismpiaij) {
7891         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7892         const PetscScalar *vals;
7893         const PetscInt    *cols, *garray = aij->garray;
7894 
7895         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7896         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7897           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7898           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7899             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7900             AA[k / bs] = 0;
7901             AJ[cidx]   = garray[cols[k]] / bs;
7902           }
7903           nc = ncols / bs;
7904           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7905           if (index_size == 0) {
7906             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7907               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7908               for (PetscInt k = 0; k < ncols; k += bs) {
7909                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7910                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7911                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7912                 }
7913               }
7914               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7915             }
7916           } else {                                            // use (index,index) value if provided
7917             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7918               PetscInt ii = index[iii];
7919               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7920               for (PetscInt k = 0; k < ncols; k += bs) {
7921                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7922                   PetscInt jj = index[jjj];
7923                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7924                 }
7925               }
7926               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7927             }
7928           }
7929           grow = Istart / bs + brow / bs;
7930           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7931         }
7932       }
7933       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7934       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7935       PetscCall(PetscFree2(AA, AJ));
7936     } else {
7937       const PetscScalar *vals;
7938       const PetscInt    *idx;
7939       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7940     old_bs:
7941       /*
7942        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7943        */
7944       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7945       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7946       if (isseqaij) {
7947         PetscInt max_d_nnz;
7948 
7949         /*
7950          Determine exact preallocation count for (sequential) scalar matrix
7951          */
7952         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7953         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7954         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7955         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7956         PetscCall(PetscFree3(w0, w1, w2));
7957       } else if (ismpiaij) {
7958         Mat             Daij, Oaij;
7959         const PetscInt *garray;
7960         PetscInt        max_d_nnz;
7961 
7962         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7963         /*
7964          Determine exact preallocation count for diagonal block portion of scalar matrix
7965          */
7966         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7967         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7968         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7969         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7970         PetscCall(PetscFree3(w0, w1, w2));
7971         /*
7972          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7973          */
7974         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7975           o_nnz[jj] = 0;
7976           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7977             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7978             o_nnz[jj] += ncols;
7979             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7980           }
7981           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7982         }
7983       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7984       /* get scalar copy (norms) of matrix */
7985       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7986       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7987       PetscCall(PetscFree2(d_nnz, o_nnz));
7988       for (Ii = Istart; Ii < Iend; Ii++) {
7989         PetscInt dest_row = Ii / bs;
7990 
7991         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7992         for (jj = 0; jj < ncols; jj++) {
7993           PetscInt    dest_col = idx[jj] / bs;
7994           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7995 
7996           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7997         }
7998         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7999       }
8000       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8001       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8002     }
8003   } else {
8004     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8005     else {
8006       Gmat = Amat;
8007       PetscCall(PetscObjectReference((PetscObject)Gmat));
8008     }
8009     if (isseqaij) {
8010       a = Gmat;
8011       b = NULL;
8012     } else {
8013       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8014       a             = d->A;
8015       b             = d->B;
8016     }
8017     if (filter >= 0 || scale) {
8018       /* take absolute value of each entry */
8019       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8020         MatInfo      info;
8021         PetscScalar *avals;
8022 
8023         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8024         PetscCall(MatSeqAIJGetArray(c, &avals));
8025         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8026         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8027       }
8028     }
8029   }
8030   if (symmetrize) {
8031     PetscBool isset, issym;
8032 
8033     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8034     if (!isset || !issym) {
8035       Mat matTrans;
8036 
8037       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8038       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8039       PetscCall(MatDestroy(&matTrans));
8040     }
8041     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8042   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8043   if (scale) {
8044     /* scale c for all diagonal values = 1 or -1 */
8045     Vec diag;
8046 
8047     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8048     PetscCall(MatGetDiagonal(Gmat, diag));
8049     PetscCall(VecReciprocal(diag));
8050     PetscCall(VecSqrtAbs(diag));
8051     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8052     PetscCall(VecDestroy(&diag));
8053   }
8054   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8055   if (filter >= 0) {
8056     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8057     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8058   }
8059   *a_Gmat = Gmat;
8060   PetscFunctionReturn(PETSC_SUCCESS);
8061 }
8062 
8063 /*
8064     Special version for direct calls from Fortran
8065 */
8066 
8067 /* Change these macros so can be used in void function */
8068 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8069 #undef PetscCall
8070 #define PetscCall(...) \
8071   do { \
8072     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8073     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8074       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8075       return; \
8076     } \
8077   } while (0)
8078 
8079 #undef SETERRQ
8080 #define SETERRQ(comm, ierr, ...) \
8081   do { \
8082     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8083     return; \
8084   } while (0)
8085 
8086 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8087   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8088 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8089   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8090 #else
8091 #endif
8092 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8093 {
8094   Mat         mat = *mmat;
8095   PetscInt    m = *mm, n = *mn;
8096   InsertMode  addv = *maddv;
8097   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8098   PetscScalar value;
8099 
8100   MatCheckPreallocated(mat, 1);
8101   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8102   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8103   {
8104     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8105     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8106     PetscBool roworiented = aij->roworiented;
8107 
8108     /* Some Variables required in the macro */
8109     Mat         A     = aij->A;
8110     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8111     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8112     MatScalar  *aa;
8113     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8114     Mat         B                 = aij->B;
8115     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8116     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8117     MatScalar  *ba;
8118     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8119      * cannot use "#if defined" inside a macro. */
8120     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8121 
8122     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8123     PetscInt   nonew = a->nonew;
8124     MatScalar *ap1, *ap2;
8125 
8126     PetscFunctionBegin;
8127     PetscCall(MatSeqAIJGetArray(A, &aa));
8128     PetscCall(MatSeqAIJGetArray(B, &ba));
8129     for (i = 0; i < m; i++) {
8130       if (im[i] < 0) continue;
8131       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8132       if (im[i] >= rstart && im[i] < rend) {
8133         row      = im[i] - rstart;
8134         lastcol1 = -1;
8135         rp1      = aj + ai[row];
8136         ap1      = aa + ai[row];
8137         rmax1    = aimax[row];
8138         nrow1    = ailen[row];
8139         low1     = 0;
8140         high1    = nrow1;
8141         lastcol2 = -1;
8142         rp2      = bj + bi[row];
8143         ap2      = ba + bi[row];
8144         rmax2    = bimax[row];
8145         nrow2    = bilen[row];
8146         low2     = 0;
8147         high2    = nrow2;
8148 
8149         for (j = 0; j < n; j++) {
8150           if (roworiented) value = v[i * n + j];
8151           else value = v[i + j * m];
8152           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8153           if (in[j] >= cstart && in[j] < cend) {
8154             col = in[j] - cstart;
8155             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8156           } else if (in[j] < 0) continue;
8157           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8158             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8159           } else {
8160             if (mat->was_assembled) {
8161               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8162 #if defined(PETSC_USE_CTABLE)
8163               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8164               col--;
8165 #else
8166               col = aij->colmap[in[j]] - 1;
8167 #endif
8168               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8169                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8170                 col = in[j];
8171                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8172                 B        = aij->B;
8173                 b        = (Mat_SeqAIJ *)B->data;
8174                 bimax    = b->imax;
8175                 bi       = b->i;
8176                 bilen    = b->ilen;
8177                 bj       = b->j;
8178                 rp2      = bj + bi[row];
8179                 ap2      = ba + bi[row];
8180                 rmax2    = bimax[row];
8181                 nrow2    = bilen[row];
8182                 low2     = 0;
8183                 high2    = nrow2;
8184                 bm       = aij->B->rmap->n;
8185                 ba       = b->a;
8186                 inserted = PETSC_FALSE;
8187               }
8188             } else col = in[j];
8189             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8190           }
8191         }
8192       } else if (!aij->donotstash) {
8193         if (roworiented) {
8194           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8195         } else {
8196           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8197         }
8198       }
8199     }
8200     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8201     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8202   }
8203   PetscFunctionReturnVoid();
8204 }
8205 
8206 /* Undefining these here since they were redefined from their original definition above! No
8207  * other PETSc functions should be defined past this point, as it is impossible to recover the
8208  * original definitions */
8209 #undef PetscCall
8210 #undef SETERRQ
8211