xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 08d7fc577d2b963e7548305e6fce0f0b87ec3be7)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK)
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309   PetscMPIInt        in;
310 
311   PetscFunctionBegin;
312   PetscCall(MatGetSize(A, &m, &n));
313   PetscCall(PetscCalloc1(n, &work));
314   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
316   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
317   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
318   if (type == NORM_2) {
319     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
320     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
321   } else if (type == NORM_1) {
322     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
323     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
324   } else if (type == NORM_INFINITY) {
325     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
326     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
327   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
328     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
329     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
330   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
331     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
332     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
333   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
334   PetscCall(PetscMPIIntCast(n, &in));
335   if (type == NORM_INFINITY) {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
337   } else {
338     PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
339   }
340   PetscCall(PetscFree(work));
341   if (type == NORM_2) {
342     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
343   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
344     for (i = 0; i < n; i++) reductions[i] /= m;
345   }
346   PetscFunctionReturn(PETSC_SUCCESS);
347 }
348 
349 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
350 {
351   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
352   IS              sis, gis;
353   const PetscInt *isis, *igis;
354   PetscInt        n, *iis, nsis, ngis, rstart, i;
355 
356   PetscFunctionBegin;
357   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
358   PetscCall(MatFindNonzeroRows(a->B, &gis));
359   PetscCall(ISGetSize(gis, &ngis));
360   PetscCall(ISGetSize(sis, &nsis));
361   PetscCall(ISGetIndices(sis, &isis));
362   PetscCall(ISGetIndices(gis, &igis));
363 
364   PetscCall(PetscMalloc1(ngis + nsis, &iis));
365   PetscCall(PetscArraycpy(iis, igis, ngis));
366   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
367   n = ngis + nsis;
368   PetscCall(PetscSortRemoveDupsInt(&n, iis));
369   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
370   for (i = 0; i < n; i++) iis[i] += rstart;
371   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
372 
373   PetscCall(ISRestoreIndices(sis, &isis));
374   PetscCall(ISRestoreIndices(gis, &igis));
375   PetscCall(ISDestroy(&sis));
376   PetscCall(ISDestroy(&gis));
377   PetscFunctionReturn(PETSC_SUCCESS);
378 }
379 
380 /*
381   Local utility routine that creates a mapping from the global column
382 number to the local number in the off-diagonal part of the local
383 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
384 a slightly higher hash table cost; without it it is not scalable (each processor
385 has an order N integer array but is fast to access.
386 */
387 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
388 {
389   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
390   PetscInt    n   = aij->B->cmap->n, i;
391 
392   PetscFunctionBegin;
393   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
394 #if defined(PETSC_USE_CTABLE)
395   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
396   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
397 #else
398   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
399   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
400 #endif
401   PetscFunctionReturn(PETSC_SUCCESS);
402 }
403 
404 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
405   do { \
406     if (col <= lastcol1) low1 = 0; \
407     else high1 = nrow1; \
408     lastcol1 = col; \
409     while (high1 - low1 > 5) { \
410       t = (low1 + high1) / 2; \
411       if (rp1[t] > col) high1 = t; \
412       else low1 = t; \
413     } \
414     for (_i = low1; _i < high1; _i++) { \
415       if (rp1[_i] > col) break; \
416       if (rp1[_i] == col) { \
417         if (addv == ADD_VALUES) { \
418           ap1[_i] += value; \
419           /* Not sure LogFlops will slow dow the code or not */ \
420           (void)PetscLogFlops(1.0); \
421         } else ap1[_i] = value; \
422         goto a_noinsert; \
423       } \
424     } \
425     if (value == 0.0 && ignorezeroentries && row != col) { \
426       low1  = 0; \
427       high1 = nrow1; \
428       goto a_noinsert; \
429     } \
430     if (nonew == 1) { \
431       low1  = 0; \
432       high1 = nrow1; \
433       goto a_noinsert; \
434     } \
435     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
436     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
437     N = nrow1++ - 1; \
438     a->nz++; \
439     high1++; \
440     /* shift up all the later entries in this row */ \
441     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
442     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
443     rp1[_i] = col; \
444     ap1[_i] = value; \
445   a_noinsert:; \
446     ailen[row] = nrow1; \
447   } while (0)
448 
449 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
450   do { \
451     if (col <= lastcol2) low2 = 0; \
452     else high2 = nrow2; \
453     lastcol2 = col; \
454     while (high2 - low2 > 5) { \
455       t = (low2 + high2) / 2; \
456       if (rp2[t] > col) high2 = t; \
457       else low2 = t; \
458     } \
459     for (_i = low2; _i < high2; _i++) { \
460       if (rp2[_i] > col) break; \
461       if (rp2[_i] == col) { \
462         if (addv == ADD_VALUES) { \
463           ap2[_i] += value; \
464           (void)PetscLogFlops(1.0); \
465         } else ap2[_i] = value; \
466         goto b_noinsert; \
467       } \
468     } \
469     if (value == 0.0 && ignorezeroentries) { \
470       low2  = 0; \
471       high2 = nrow2; \
472       goto b_noinsert; \
473     } \
474     if (nonew == 1) { \
475       low2  = 0; \
476       high2 = nrow2; \
477       goto b_noinsert; \
478     } \
479     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
480     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
481     N = nrow2++ - 1; \
482     b->nz++; \
483     high2++; \
484     /* shift up all the later entries in this row */ \
485     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
486     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
487     rp2[_i] = col; \
488     ap2[_i] = value; \
489   b_noinsert:; \
490     bilen[row] = nrow2; \
491   } while (0)
492 
493 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
494 {
495   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
496   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
497   PetscInt     l, *garray                         = mat->garray, diag;
498   PetscScalar *aa, *ba;
499 
500   PetscFunctionBegin;
501   /* code only works for square matrices A */
502 
503   /* find size of row to the left of the diagonal part */
504   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
505   row = row - diag;
506   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
507     if (garray[b->j[b->i[row] + l]] > diag) break;
508   }
509   if (l) {
510     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
511     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
512     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
513   }
514 
515   /* diagonal part */
516   if (a->i[row + 1] - a->i[row]) {
517     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
518     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
519     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
520   }
521 
522   /* right of diagonal part */
523   if (b->i[row + 1] - b->i[row] - l) {
524     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
525     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
526     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
527   }
528   PetscFunctionReturn(PETSC_SUCCESS);
529 }
530 
531 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
532 {
533   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
534   PetscScalar value = 0.0;
535   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
536   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
537   PetscBool   roworiented = aij->roworiented;
538 
539   /* Some Variables required in the macro */
540   Mat         A     = aij->A;
541   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
542   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
543   PetscBool   ignorezeroentries = a->ignorezeroentries;
544   Mat         B                 = aij->B;
545   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
546   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
547   MatScalar  *aa, *ba;
548   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
549   PetscInt    nonew;
550   MatScalar  *ap1, *ap2;
551 
552   PetscFunctionBegin;
553   PetscCall(MatSeqAIJGetArray(A, &aa));
554   PetscCall(MatSeqAIJGetArray(B, &ba));
555   for (i = 0; i < m; i++) {
556     if (im[i] < 0) continue;
557     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
558     if (im[i] >= rstart && im[i] < rend) {
559       row      = im[i] - rstart;
560       lastcol1 = -1;
561       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
562       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
563       rmax1    = aimax[row];
564       nrow1    = ailen[row];
565       low1     = 0;
566       high1    = nrow1;
567       lastcol2 = -1;
568       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
569       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
570       rmax2    = bimax[row];
571       nrow2    = bilen[row];
572       low2     = 0;
573       high2    = nrow2;
574 
575       for (j = 0; j < n; j++) {
576         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
577         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
578         if (in[j] >= cstart && in[j] < cend) {
579           col   = in[j] - cstart;
580           nonew = a->nonew;
581           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
582         } else if (in[j] < 0) {
583           continue;
584         } else {
585           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
586           if (mat->was_assembled) {
587             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
588 #if defined(PETSC_USE_CTABLE)
589             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
590             col--;
591 #else
592             col = aij->colmap[in[j]] - 1;
593 #endif
594             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
595               PetscCall(MatDisAssemble_MPIAIJ(mat));               /* Change aij->B from reduced/local format to expanded/global format */
596               col = in[j];
597               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
598               B     = aij->B;
599               b     = (Mat_SeqAIJ *)B->data;
600               bimax = b->imax;
601               bi    = b->i;
602               bilen = b->ilen;
603               bj    = b->j;
604               ba    = b->a;
605               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
606               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
607               rmax2 = bimax[row];
608               nrow2 = bilen[row];
609               low2  = 0;
610               high2 = nrow2;
611               bm    = aij->B->rmap->n;
612               ba    = b->a;
613             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
614               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
615                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
616               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
617             }
618           } else col = in[j];
619           nonew = b->nonew;
620           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
621         }
622       }
623     } else {
624       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
625       if (!aij->donotstash) {
626         mat->assembled = PETSC_FALSE;
627         if (roworiented) {
628           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
629         } else {
630           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
631         }
632       }
633     }
634   }
635   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
636   PetscCall(MatSeqAIJRestoreArray(B, &ba));
637   PetscFunctionReturn(PETSC_SUCCESS);
638 }
639 
640 /*
641     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
642     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
643     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
644 */
645 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
646 {
647   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
648   Mat         A      = aij->A; /* diagonal part of the matrix */
649   Mat         B      = aij->B; /* off-diagonal part of the matrix */
650   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
651   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
652   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
653   PetscInt   *ailen = a->ilen, *aj = a->j;
654   PetscInt   *bilen = b->ilen, *bj = b->j;
655   PetscInt    am          = aij->A->rmap->n, j;
656   PetscInt    diag_so_far = 0, dnz;
657   PetscInt    offd_so_far = 0, onz;
658 
659   PetscFunctionBegin;
660   /* Iterate over all rows of the matrix */
661   for (j = 0; j < am; j++) {
662     dnz = onz = 0;
663     /*  Iterate over all non-zero columns of the current row */
664     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
665       /* If column is in the diagonal */
666       if (mat_j[col] >= cstart && mat_j[col] < cend) {
667         aj[diag_so_far++] = mat_j[col] - cstart;
668         dnz++;
669       } else { /* off-diagonal entries */
670         bj[offd_so_far++] = mat_j[col];
671         onz++;
672       }
673     }
674     ailen[j] = dnz;
675     bilen[j] = onz;
676   }
677   PetscFunctionReturn(PETSC_SUCCESS);
678 }
679 
680 /*
681     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
682     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
683     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
684     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
685     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
686 */
687 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
688 {
689   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
690   Mat          A    = aij->A; /* diagonal part of the matrix */
691   Mat          B    = aij->B; /* off-diagonal part of the matrix */
692   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
693   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
694   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
695   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
696   PetscInt    *ailen = a->ilen, *aj = a->j;
697   PetscInt    *bilen = b->ilen, *bj = b->j;
698   PetscInt     am          = aij->A->rmap->n, j;
699   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
700   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
701   PetscScalar *aa = a->a, *ba = b->a;
702 
703   PetscFunctionBegin;
704   /* Iterate over all rows of the matrix */
705   for (j = 0; j < am; j++) {
706     dnz_row = onz_row = 0;
707     rowstart_offd     = full_offd_i[j];
708     rowstart_diag     = full_diag_i[j];
709     /*  Iterate over all non-zero columns of the current row */
710     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
711       /* If column is in the diagonal */
712       if (mat_j[col] >= cstart && mat_j[col] < cend) {
713         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
714         aa[rowstart_diag + dnz_row] = mat_a[col];
715         dnz_row++;
716       } else { /* off-diagonal entries */
717         bj[rowstart_offd + onz_row] = mat_j[col];
718         ba[rowstart_offd + onz_row] = mat_a[col];
719         onz_row++;
720       }
721     }
722     ailen[j] = dnz_row;
723     bilen[j] = onz_row;
724   }
725   PetscFunctionReturn(PETSC_SUCCESS);
726 }
727 
728 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
729 {
730   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
731   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
732   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
733 
734   PetscFunctionBegin;
735   for (i = 0; i < m; i++) {
736     if (idxm[i] < 0) continue; /* negative row */
737     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
738     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
739     row = idxm[i] - rstart;
740     for (j = 0; j < n; j++) {
741       if (idxn[j] < 0) continue; /* negative column */
742       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
743       if (idxn[j] >= cstart && idxn[j] < cend) {
744         col = idxn[j] - cstart;
745         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
746       } else {
747         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
748 #if defined(PETSC_USE_CTABLE)
749         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
750         col--;
751 #else
752         col = aij->colmap[idxn[j]] - 1;
753 #endif
754         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
755         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
756       }
757     }
758   }
759   PetscFunctionReturn(PETSC_SUCCESS);
760 }
761 
762 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
763 {
764   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
765   PetscInt    nstash, reallocs;
766 
767   PetscFunctionBegin;
768   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
769 
770   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
771   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
772   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
773   PetscFunctionReturn(PETSC_SUCCESS);
774 }
775 
776 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
777 {
778   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
779   PetscMPIInt  n;
780   PetscInt     i, j, rstart, ncols, flg;
781   PetscInt    *row, *col;
782   PetscBool    other_disassembled;
783   PetscScalar *val;
784 
785   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
786 
787   PetscFunctionBegin;
788   if (!aij->donotstash && !mat->nooffprocentries) {
789     while (1) {
790       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
791       if (!flg) break;
792 
793       for (i = 0; i < n;) {
794         /* Now identify the consecutive vals belonging to the same row */
795         for (j = i, rstart = row[j]; j < n; j++) {
796           if (row[j] != rstart) break;
797         }
798         if (j < n) ncols = j - i;
799         else ncols = n - i;
800         /* Now assemble all these values with a single function call */
801         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
802         i = j;
803       }
804     }
805     PetscCall(MatStashScatterEnd_Private(&mat->stash));
806   }
807 #if defined(PETSC_HAVE_DEVICE)
808   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
809   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
810   if (mat->boundtocpu) {
811     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
812     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
813   }
814 #endif
815   PetscCall(MatAssemblyBegin(aij->A, mode));
816   PetscCall(MatAssemblyEnd(aij->A, mode));
817 
818   /* determine if any processor has disassembled, if so we must
819      also disassemble ourself, in order that we may reassemble. */
820   /*
821      if nonzero structure of submatrix B cannot change then we know that
822      no processor disassembled thus we can skip this stuff
823   */
824   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
825     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
826     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
827       PetscCall(MatDisAssemble_MPIAIJ(mat));
828     }
829   }
830   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
831   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
832 #if defined(PETSC_HAVE_DEVICE)
833   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
834 #endif
835   PetscCall(MatAssemblyBegin(aij->B, mode));
836   PetscCall(MatAssemblyEnd(aij->B, mode));
837 
838   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
839 
840   aij->rowvalues = NULL;
841 
842   PetscCall(VecDestroy(&aij->diag));
843 
844   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
845   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
846     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
847     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
848   }
849 #if defined(PETSC_HAVE_DEVICE)
850   mat->offloadmask = PETSC_OFFLOAD_BOTH;
851 #endif
852   PetscFunctionReturn(PETSC_SUCCESS);
853 }
854 
855 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
856 {
857   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
858 
859   PetscFunctionBegin;
860   PetscCall(MatZeroEntries(l->A));
861   PetscCall(MatZeroEntries(l->B));
862   PetscFunctionReturn(PETSC_SUCCESS);
863 }
864 
865 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
866 {
867   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
868   PetscInt   *lrows;
869   PetscInt    r, len;
870   PetscBool   cong;
871 
872   PetscFunctionBegin;
873   /* get locally owned rows */
874   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
875   PetscCall(MatHasCongruentLayouts(A, &cong));
876   /* fix right-hand side if needed */
877   if (x && b) {
878     const PetscScalar *xx;
879     PetscScalar       *bb;
880 
881     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
882     PetscCall(VecGetArrayRead(x, &xx));
883     PetscCall(VecGetArray(b, &bb));
884     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
885     PetscCall(VecRestoreArrayRead(x, &xx));
886     PetscCall(VecRestoreArray(b, &bb));
887   }
888 
889   if (diag != 0.0 && cong) {
890     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
891     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
892   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
893     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
894     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
895     PetscInt    nnwA, nnwB;
896     PetscBool   nnzA, nnzB;
897 
898     nnwA = aijA->nonew;
899     nnwB = aijB->nonew;
900     nnzA = aijA->keepnonzeropattern;
901     nnzB = aijB->keepnonzeropattern;
902     if (!nnzA) {
903       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
904       aijA->nonew = 0;
905     }
906     if (!nnzB) {
907       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
908       aijB->nonew = 0;
909     }
910     /* Must zero here before the next loop */
911     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
912     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
913     for (r = 0; r < len; ++r) {
914       const PetscInt row = lrows[r] + A->rmap->rstart;
915       if (row >= A->cmap->N) continue;
916       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
917     }
918     aijA->nonew = nnwA;
919     aijB->nonew = nnwB;
920   } else {
921     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
922     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
923   }
924   PetscCall(PetscFree(lrows));
925   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
926   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
927 
928   /* only change matrix nonzero state if pattern was allowed to be changed */
929   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
930     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
931     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
932   }
933   PetscFunctionReturn(PETSC_SUCCESS);
934 }
935 
936 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
937 {
938   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
939   PetscInt           n = A->rmap->n;
940   PetscInt           i, j, r, m, len = 0;
941   PetscInt          *lrows, *owners = A->rmap->range;
942   PetscMPIInt        p = 0;
943   PetscSFNode       *rrows;
944   PetscSF            sf;
945   const PetscScalar *xx;
946   PetscScalar       *bb, *mask, *aij_a;
947   Vec                xmask, lmask;
948   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
949   const PetscInt    *aj, *ii, *ridx;
950   PetscScalar       *aa;
951 
952   PetscFunctionBegin;
953   /* Create SF where leaves are input rows and roots are owned rows */
954   PetscCall(PetscMalloc1(n, &lrows));
955   for (r = 0; r < n; ++r) lrows[r] = -1;
956   PetscCall(PetscMalloc1(N, &rrows));
957   for (r = 0; r < N; ++r) {
958     const PetscInt idx = rows[r];
959     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
960     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
961       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
962     }
963     rrows[r].rank  = p;
964     rrows[r].index = rows[r] - owners[p];
965   }
966   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
967   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
968   /* Collect flags for rows to be zeroed */
969   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
970   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
971   PetscCall(PetscSFDestroy(&sf));
972   /* Compress and put in row numbers */
973   for (r = 0; r < n; ++r)
974     if (lrows[r] >= 0) lrows[len++] = r;
975   /* zero diagonal part of matrix */
976   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
977   /* handle off-diagonal part of matrix */
978   PetscCall(MatCreateVecs(A, &xmask, NULL));
979   PetscCall(VecDuplicate(l->lvec, &lmask));
980   PetscCall(VecGetArray(xmask, &bb));
981   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
982   PetscCall(VecRestoreArray(xmask, &bb));
983   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
984   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
985   PetscCall(VecDestroy(&xmask));
986   if (x && b) { /* this code is buggy when the row and column layout don't match */
987     PetscBool cong;
988 
989     PetscCall(MatHasCongruentLayouts(A, &cong));
990     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
991     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
992     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
993     PetscCall(VecGetArrayRead(l->lvec, &xx));
994     PetscCall(VecGetArray(b, &bb));
995   }
996   PetscCall(VecGetArray(lmask, &mask));
997   /* remove zeroed rows of off-diagonal matrix */
998   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
999   ii = aij->i;
1000   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
1001   /* loop over all elements of off process part of matrix zeroing removed columns*/
1002   if (aij->compressedrow.use) {
1003     m    = aij->compressedrow.nrows;
1004     ii   = aij->compressedrow.i;
1005     ridx = aij->compressedrow.rindex;
1006     for (i = 0; i < m; i++) {
1007       n  = ii[i + 1] - ii[i];
1008       aj = aij->j + ii[i];
1009       aa = aij_a + ii[i];
1010 
1011       for (j = 0; j < n; j++) {
1012         if (PetscAbsScalar(mask[*aj])) {
1013           if (b) bb[*ridx] -= *aa * xx[*aj];
1014           *aa = 0.0;
1015         }
1016         aa++;
1017         aj++;
1018       }
1019       ridx++;
1020     }
1021   } else { /* do not use compressed row format */
1022     m = l->B->rmap->n;
1023     for (i = 0; i < m; i++) {
1024       n  = ii[i + 1] - ii[i];
1025       aj = aij->j + ii[i];
1026       aa = aij_a + ii[i];
1027       for (j = 0; j < n; j++) {
1028         if (PetscAbsScalar(mask[*aj])) {
1029           if (b) bb[i] -= *aa * xx[*aj];
1030           *aa = 0.0;
1031         }
1032         aa++;
1033         aj++;
1034       }
1035     }
1036   }
1037   if (x && b) {
1038     PetscCall(VecRestoreArray(b, &bb));
1039     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1040   }
1041   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1042   PetscCall(VecRestoreArray(lmask, &mask));
1043   PetscCall(VecDestroy(&lmask));
1044   PetscCall(PetscFree(lrows));
1045 
1046   /* only change matrix nonzero state if pattern was allowed to be changed */
1047   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1048     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1049     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1050   }
1051   PetscFunctionReturn(PETSC_SUCCESS);
1052 }
1053 
1054 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1055 {
1056   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1057   PetscInt    nt;
1058   VecScatter  Mvctx = a->Mvctx;
1059 
1060   PetscFunctionBegin;
1061   PetscCall(VecGetLocalSize(xx, &nt));
1062   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1063   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscUseTypeMethod(a->A, mult, xx, yy);
1065   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1066   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1067   PetscFunctionReturn(PETSC_SUCCESS);
1068 }
1069 
1070 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1071 {
1072   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1073 
1074   PetscFunctionBegin;
1075   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1076   PetscFunctionReturn(PETSC_SUCCESS);
1077 }
1078 
1079 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1080 {
1081   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1082   VecScatter  Mvctx = a->Mvctx;
1083 
1084   PetscFunctionBegin;
1085   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1086   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1087   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1088   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1089   PetscFunctionReturn(PETSC_SUCCESS);
1090 }
1091 
1092 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1093 {
1094   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1095 
1096   PetscFunctionBegin;
1097   /* do nondiagonal part */
1098   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1099   /* do local part */
1100   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1101   /* add partial results together */
1102   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1103   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1104   PetscFunctionReturn(PETSC_SUCCESS);
1105 }
1106 
1107 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1108 {
1109   MPI_Comm    comm;
1110   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1111   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1112   IS          Me, Notme;
1113   PetscInt    M, N, first, last, *notme, i;
1114   PetscBool   lf;
1115   PetscMPIInt size;
1116 
1117   PetscFunctionBegin;
1118   /* Easy test: symmetric diagonal block */
1119   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1120   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1121   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1122   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1123   PetscCallMPI(MPI_Comm_size(comm, &size));
1124   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1125 
1126   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1127   PetscCall(MatGetSize(Amat, &M, &N));
1128   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1129   PetscCall(PetscMalloc1(N - last + first, &notme));
1130   for (i = 0; i < first; i++) notme[i] = i;
1131   for (i = last; i < M; i++) notme[i - last + first] = i;
1132   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1133   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1134   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1135   Aoff = Aoffs[0];
1136   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1137   Boff = Boffs[0];
1138   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1139   PetscCall(MatDestroyMatrices(1, &Aoffs));
1140   PetscCall(MatDestroyMatrices(1, &Boffs));
1141   PetscCall(ISDestroy(&Me));
1142   PetscCall(ISDestroy(&Notme));
1143   PetscCall(PetscFree(notme));
1144   PetscFunctionReturn(PETSC_SUCCESS);
1145 }
1146 
1147 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1148 {
1149   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1150 
1151   PetscFunctionBegin;
1152   /* do nondiagonal part */
1153   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1154   /* do local part */
1155   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1156   /* add partial results together */
1157   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1158   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1159   PetscFunctionReturn(PETSC_SUCCESS);
1160 }
1161 
1162 /*
1163   This only works correctly for square matrices where the subblock A->A is the
1164    diagonal block
1165 */
1166 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1167 {
1168   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1169 
1170   PetscFunctionBegin;
1171   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1172   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1173   PetscCall(MatGetDiagonal(a->A, v));
1174   PetscFunctionReturn(PETSC_SUCCESS);
1175 }
1176 
1177 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1178 {
1179   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1180 
1181   PetscFunctionBegin;
1182   PetscCall(MatScale(a->A, aa));
1183   PetscCall(MatScale(a->B, aa));
1184   PetscFunctionReturn(PETSC_SUCCESS);
1185 }
1186 
1187 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1188 {
1189   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1190   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1191   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1192   const PetscInt    *garray = aij->garray;
1193   const PetscScalar *aa, *ba;
1194   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1195   PetscInt64         nz, hnz;
1196   PetscInt          *rowlens;
1197   PetscInt          *colidxs;
1198   PetscScalar       *matvals;
1199   PetscMPIInt        rank;
1200 
1201   PetscFunctionBegin;
1202   PetscCall(PetscViewerSetUp(viewer));
1203 
1204   M  = mat->rmap->N;
1205   N  = mat->cmap->N;
1206   m  = mat->rmap->n;
1207   rs = mat->rmap->rstart;
1208   cs = mat->cmap->rstart;
1209   nz = A->nz + B->nz;
1210 
1211   /* write matrix header */
1212   header[0] = MAT_FILE_CLASSID;
1213   header[1] = M;
1214   header[2] = N;
1215   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1216   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1217   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1218   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1219 
1220   /* fill in and store row lengths  */
1221   PetscCall(PetscMalloc1(m, &rowlens));
1222   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1223   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1224   PetscCall(PetscFree(rowlens));
1225 
1226   /* fill in and store column indices */
1227   PetscCall(PetscMalloc1(nz, &colidxs));
1228   for (cnt = 0, i = 0; i < m; i++) {
1229     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1230       if (garray[B->j[jb]] > cs) break;
1231       colidxs[cnt++] = garray[B->j[jb]];
1232     }
1233     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1234     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1235   }
1236   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1237   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1238   PetscCall(PetscFree(colidxs));
1239 
1240   /* fill in and store nonzero values */
1241   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1242   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1243   PetscCall(PetscMalloc1(nz, &matvals));
1244   for (cnt = 0, i = 0; i < m; i++) {
1245     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1246       if (garray[B->j[jb]] > cs) break;
1247       matvals[cnt++] = ba[jb];
1248     }
1249     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1250     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1251   }
1252   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1253   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1254   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1255   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1256   PetscCall(PetscFree(matvals));
1257 
1258   /* write block size option to the viewer's .info file */
1259   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1260   PetscFunctionReturn(PETSC_SUCCESS);
1261 }
1262 
1263 #include <petscdraw.h>
1264 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1265 {
1266   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1267   PetscMPIInt       rank = aij->rank, size = aij->size;
1268   PetscBool         isdraw, iascii, isbinary;
1269   PetscViewer       sviewer;
1270   PetscViewerFormat format;
1271 
1272   PetscFunctionBegin;
1273   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1274   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1275   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1276   if (iascii) {
1277     PetscCall(PetscViewerGetFormat(viewer, &format));
1278     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1279       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1280       PetscCall(PetscMalloc1(size, &nz));
1281       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1282       for (i = 0; i < size; i++) {
1283         nmax = PetscMax(nmax, nz[i]);
1284         nmin = PetscMin(nmin, nz[i]);
1285         navg += nz[i];
1286       }
1287       PetscCall(PetscFree(nz));
1288       navg = navg / size;
1289       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1290       PetscFunctionReturn(PETSC_SUCCESS);
1291     }
1292     PetscCall(PetscViewerGetFormat(viewer, &format));
1293     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1294       MatInfo   info;
1295       PetscInt *inodes = NULL;
1296 
1297       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1298       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1299       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1300       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1301       if (!inodes) {
1302         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1303                                                      info.memory));
1304       } else {
1305         PetscCall(
1306           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1307       }
1308       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1309       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1310       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1311       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1312       PetscCall(PetscViewerFlush(viewer));
1313       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1314       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1315       PetscCall(VecScatterView(aij->Mvctx, viewer));
1316       PetscFunctionReturn(PETSC_SUCCESS);
1317     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1318       PetscInt inodecount, inodelimit, *inodes;
1319       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1320       if (inodes) {
1321         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1322       } else {
1323         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1324       }
1325       PetscFunctionReturn(PETSC_SUCCESS);
1326     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1327       PetscFunctionReturn(PETSC_SUCCESS);
1328     }
1329   } else if (isbinary) {
1330     if (size == 1) {
1331       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1332       PetscCall(MatView(aij->A, viewer));
1333     } else {
1334       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1335     }
1336     PetscFunctionReturn(PETSC_SUCCESS);
1337   } else if (iascii && size == 1) {
1338     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1339     PetscCall(MatView(aij->A, viewer));
1340     PetscFunctionReturn(PETSC_SUCCESS);
1341   } else if (isdraw) {
1342     PetscDraw draw;
1343     PetscBool isnull;
1344     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1345     PetscCall(PetscDrawIsNull(draw, &isnull));
1346     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1347   }
1348 
1349   { /* assemble the entire matrix onto first processor */
1350     Mat A = NULL, Av;
1351     IS  isrow, iscol;
1352 
1353     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1354     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1355     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1356     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1357     /*  The commented code uses MatCreateSubMatrices instead */
1358     /*
1359     Mat *AA, A = NULL, Av;
1360     IS  isrow,iscol;
1361 
1362     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1363     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1364     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1365     if (rank == 0) {
1366        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1367        A    = AA[0];
1368        Av   = AA[0];
1369     }
1370     PetscCall(MatDestroySubMatrices(1,&AA));
1371 */
1372     PetscCall(ISDestroy(&iscol));
1373     PetscCall(ISDestroy(&isrow));
1374     /*
1375        Everyone has to call to draw the matrix since the graphics waits are
1376        synchronized across all processors that share the PetscDraw object
1377     */
1378     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1379     if (rank == 0) {
1380       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1381       PetscCall(MatView_SeqAIJ(Av, sviewer));
1382     }
1383     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1384     PetscCall(MatDestroy(&A));
1385   }
1386   PetscFunctionReturn(PETSC_SUCCESS);
1387 }
1388 
1389 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1390 {
1391   PetscBool iascii, isdraw, issocket, isbinary;
1392 
1393   PetscFunctionBegin;
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1395   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1396   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1397   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1398   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1399   PetscFunctionReturn(PETSC_SUCCESS);
1400 }
1401 
1402 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1403 {
1404   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1405   Vec         bb1 = NULL;
1406   PetscBool   hasop;
1407 
1408   PetscFunctionBegin;
1409   if (flag == SOR_APPLY_UPPER) {
1410     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1411     PetscFunctionReturn(PETSC_SUCCESS);
1412   }
1413 
1414   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1415 
1416   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1417     if (flag & SOR_ZERO_INITIAL_GUESS) {
1418       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1419       its--;
1420     }
1421 
1422     while (its--) {
1423       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1424       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1425 
1426       /* update rhs: bb1 = bb - B*x */
1427       PetscCall(VecScale(mat->lvec, -1.0));
1428       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1429 
1430       /* local sweep */
1431       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1432     }
1433   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1434     if (flag & SOR_ZERO_INITIAL_GUESS) {
1435       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1436       its--;
1437     }
1438     while (its--) {
1439       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1440       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1441 
1442       /* update rhs: bb1 = bb - B*x */
1443       PetscCall(VecScale(mat->lvec, -1.0));
1444       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1445 
1446       /* local sweep */
1447       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1448     }
1449   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1450     if (flag & SOR_ZERO_INITIAL_GUESS) {
1451       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1452       its--;
1453     }
1454     while (its--) {
1455       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1456       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1457 
1458       /* update rhs: bb1 = bb - B*x */
1459       PetscCall(VecScale(mat->lvec, -1.0));
1460       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1461 
1462       /* local sweep */
1463       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1464     }
1465   } else if (flag & SOR_EISENSTAT) {
1466     Vec xx1;
1467 
1468     PetscCall(VecDuplicate(bb, &xx1));
1469     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1470 
1471     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1472     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1473     if (!mat->diag) {
1474       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1475       PetscCall(MatGetDiagonal(matin, mat->diag));
1476     }
1477     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1478     if (hasop) {
1479       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1480     } else {
1481       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1482     }
1483     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1484 
1485     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1486 
1487     /* local sweep */
1488     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1489     PetscCall(VecAXPY(xx, 1.0, xx1));
1490     PetscCall(VecDestroy(&xx1));
1491   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1492 
1493   PetscCall(VecDestroy(&bb1));
1494 
1495   matin->factorerrortype = mat->A->factorerrortype;
1496   PetscFunctionReturn(PETSC_SUCCESS);
1497 }
1498 
1499 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1500 {
1501   Mat             aA, aB, Aperm;
1502   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1503   PetscScalar    *aa, *ba;
1504   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1505   PetscSF         rowsf, sf;
1506   IS              parcolp = NULL;
1507   PetscBool       done;
1508 
1509   PetscFunctionBegin;
1510   PetscCall(MatGetLocalSize(A, &m, &n));
1511   PetscCall(ISGetIndices(rowp, &rwant));
1512   PetscCall(ISGetIndices(colp, &cwant));
1513   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1514 
1515   /* Invert row permutation to find out where my rows should go */
1516   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1517   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1518   PetscCall(PetscSFSetFromOptions(rowsf));
1519   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1520   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1521   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1522 
1523   /* Invert column permutation to find out where my columns should go */
1524   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1525   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1526   PetscCall(PetscSFSetFromOptions(sf));
1527   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1528   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1529   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1530   PetscCall(PetscSFDestroy(&sf));
1531 
1532   PetscCall(ISRestoreIndices(rowp, &rwant));
1533   PetscCall(ISRestoreIndices(colp, &cwant));
1534   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1535 
1536   /* Find out where my gcols should go */
1537   PetscCall(MatGetSize(aB, NULL, &ng));
1538   PetscCall(PetscMalloc1(ng, &gcdest));
1539   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1540   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1541   PetscCall(PetscSFSetFromOptions(sf));
1542   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1543   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1544   PetscCall(PetscSFDestroy(&sf));
1545 
1546   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1547   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1548   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1549   for (i = 0; i < m; i++) {
1550     PetscInt    row = rdest[i];
1551     PetscMPIInt rowner;
1552     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1553     for (j = ai[i]; j < ai[i + 1]; j++) {
1554       PetscInt    col = cdest[aj[j]];
1555       PetscMPIInt cowner;
1556       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1557       if (rowner == cowner) dnnz[i]++;
1558       else onnz[i]++;
1559     }
1560     for (j = bi[i]; j < bi[i + 1]; j++) {
1561       PetscInt    col = gcdest[bj[j]];
1562       PetscMPIInt cowner;
1563       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1564       if (rowner == cowner) dnnz[i]++;
1565       else onnz[i]++;
1566     }
1567   }
1568   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1570   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1571   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1572   PetscCall(PetscSFDestroy(&rowsf));
1573 
1574   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1575   PetscCall(MatSeqAIJGetArray(aA, &aa));
1576   PetscCall(MatSeqAIJGetArray(aB, &ba));
1577   for (i = 0; i < m; i++) {
1578     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1579     PetscInt  j0, rowlen;
1580     rowlen = ai[i + 1] - ai[i];
1581     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1582       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1583       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1584     }
1585     rowlen = bi[i + 1] - bi[i];
1586     for (j0 = j = 0; j < rowlen; j0 = j) {
1587       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1588       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1589     }
1590   }
1591   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1592   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1593   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1594   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1595   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1596   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1597   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1598   PetscCall(PetscFree3(work, rdest, cdest));
1599   PetscCall(PetscFree(gcdest));
1600   if (parcolp) PetscCall(ISDestroy(&colp));
1601   *B = Aperm;
1602   PetscFunctionReturn(PETSC_SUCCESS);
1603 }
1604 
1605 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1606 {
1607   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1608 
1609   PetscFunctionBegin;
1610   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1611   if (ghosts) *ghosts = aij->garray;
1612   PetscFunctionReturn(PETSC_SUCCESS);
1613 }
1614 
1615 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1616 {
1617   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1618   Mat            A = mat->A, B = mat->B;
1619   PetscLogDouble isend[5], irecv[5];
1620 
1621   PetscFunctionBegin;
1622   info->block_size = 1.0;
1623   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1624 
1625   isend[0] = info->nz_used;
1626   isend[1] = info->nz_allocated;
1627   isend[2] = info->nz_unneeded;
1628   isend[3] = info->memory;
1629   isend[4] = info->mallocs;
1630 
1631   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1632 
1633   isend[0] += info->nz_used;
1634   isend[1] += info->nz_allocated;
1635   isend[2] += info->nz_unneeded;
1636   isend[3] += info->memory;
1637   isend[4] += info->mallocs;
1638   if (flag == MAT_LOCAL) {
1639     info->nz_used      = isend[0];
1640     info->nz_allocated = isend[1];
1641     info->nz_unneeded  = isend[2];
1642     info->memory       = isend[3];
1643     info->mallocs      = isend[4];
1644   } else if (flag == MAT_GLOBAL_MAX) {
1645     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1646 
1647     info->nz_used      = irecv[0];
1648     info->nz_allocated = irecv[1];
1649     info->nz_unneeded  = irecv[2];
1650     info->memory       = irecv[3];
1651     info->mallocs      = irecv[4];
1652   } else if (flag == MAT_GLOBAL_SUM) {
1653     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1654 
1655     info->nz_used      = irecv[0];
1656     info->nz_allocated = irecv[1];
1657     info->nz_unneeded  = irecv[2];
1658     info->memory       = irecv[3];
1659     info->mallocs      = irecv[4];
1660   }
1661   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1662   info->fill_ratio_needed = 0;
1663   info->factor_mallocs    = 0;
1664   PetscFunctionReturn(PETSC_SUCCESS);
1665 }
1666 
1667 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1668 {
1669   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1670 
1671   PetscFunctionBegin;
1672   switch (op) {
1673   case MAT_NEW_NONZERO_LOCATIONS:
1674   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1675   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1676   case MAT_KEEP_NONZERO_PATTERN:
1677   case MAT_NEW_NONZERO_LOCATION_ERR:
1678   case MAT_USE_INODES:
1679   case MAT_IGNORE_ZERO_ENTRIES:
1680   case MAT_FORM_EXPLICIT_TRANSPOSE:
1681     MatCheckPreallocated(A, 1);
1682     PetscCall(MatSetOption(a->A, op, flg));
1683     PetscCall(MatSetOption(a->B, op, flg));
1684     break;
1685   case MAT_ROW_ORIENTED:
1686     MatCheckPreallocated(A, 1);
1687     a->roworiented = flg;
1688 
1689     PetscCall(MatSetOption(a->A, op, flg));
1690     PetscCall(MatSetOption(a->B, op, flg));
1691     break;
1692   case MAT_FORCE_DIAGONAL_ENTRIES:
1693   case MAT_SORTED_FULL:
1694     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1695     break;
1696   case MAT_IGNORE_OFF_PROC_ENTRIES:
1697     a->donotstash = flg;
1698     break;
1699   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1700   case MAT_SPD:
1701   case MAT_SYMMETRIC:
1702   case MAT_STRUCTURALLY_SYMMETRIC:
1703   case MAT_HERMITIAN:
1704   case MAT_SYMMETRY_ETERNAL:
1705   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1706   case MAT_SPD_ETERNAL:
1707     /* if the diagonal matrix is square it inherits some of the properties above */
1708     break;
1709   case MAT_SUBMAT_SINGLEIS:
1710     A->submat_singleis = flg;
1711     break;
1712   case MAT_STRUCTURE_ONLY:
1713     /* The option is handled directly by MatSetOption() */
1714     break;
1715   default:
1716     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1717   }
1718   PetscFunctionReturn(PETSC_SUCCESS);
1719 }
1720 
1721 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1722 {
1723   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1724   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1725   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1726   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1727   PetscInt    *cmap, *idx_p;
1728 
1729   PetscFunctionBegin;
1730   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1731   mat->getrowactive = PETSC_TRUE;
1732 
1733   if (!mat->rowvalues && (idx || v)) {
1734     /*
1735         allocate enough space to hold information from the longest row.
1736     */
1737     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1738     PetscInt    max = 1, tmp;
1739     for (i = 0; i < matin->rmap->n; i++) {
1740       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1741       if (max < tmp) max = tmp;
1742     }
1743     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1744   }
1745 
1746   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1747   lrow = row - rstart;
1748 
1749   pvA = &vworkA;
1750   pcA = &cworkA;
1751   pvB = &vworkB;
1752   pcB = &cworkB;
1753   if (!v) {
1754     pvA = NULL;
1755     pvB = NULL;
1756   }
1757   if (!idx) {
1758     pcA = NULL;
1759     if (!v) pcB = NULL;
1760   }
1761   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1762   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1763   nztot = nzA + nzB;
1764 
1765   cmap = mat->garray;
1766   if (v || idx) {
1767     if (nztot) {
1768       /* Sort by increasing column numbers, assuming A and B already sorted */
1769       PetscInt imark = -1;
1770       if (v) {
1771         *v = v_p = mat->rowvalues;
1772         for (i = 0; i < nzB; i++) {
1773           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1774           else break;
1775         }
1776         imark = i;
1777         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1778         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1779       }
1780       if (idx) {
1781         *idx = idx_p = mat->rowindices;
1782         if (imark > -1) {
1783           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1784         } else {
1785           for (i = 0; i < nzB; i++) {
1786             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1787             else break;
1788           }
1789           imark = i;
1790         }
1791         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1792         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1793       }
1794     } else {
1795       if (idx) *idx = NULL;
1796       if (v) *v = NULL;
1797     }
1798   }
1799   *nz = nztot;
1800   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1801   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1802   PetscFunctionReturn(PETSC_SUCCESS);
1803 }
1804 
1805 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1806 {
1807   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1808 
1809   PetscFunctionBegin;
1810   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1811   aij->getrowactive = PETSC_FALSE;
1812   PetscFunctionReturn(PETSC_SUCCESS);
1813 }
1814 
1815 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1816 {
1817   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1818   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1819   PetscInt         i, j, cstart = mat->cmap->rstart;
1820   PetscReal        sum = 0.0;
1821   const MatScalar *v, *amata, *bmata;
1822   PetscMPIInt      iN;
1823 
1824   PetscFunctionBegin;
1825   if (aij->size == 1) {
1826     PetscCall(MatNorm(aij->A, type, norm));
1827   } else {
1828     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1829     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1830     if (type == NORM_FROBENIUS) {
1831       v = amata;
1832       for (i = 0; i < amat->nz; i++) {
1833         sum += PetscRealPart(PetscConj(*v) * (*v));
1834         v++;
1835       }
1836       v = bmata;
1837       for (i = 0; i < bmat->nz; i++) {
1838         sum += PetscRealPart(PetscConj(*v) * (*v));
1839         v++;
1840       }
1841       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1842       *norm = PetscSqrtReal(*norm);
1843       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1844     } else if (type == NORM_1) { /* max column norm */
1845       PetscReal *tmp, *tmp2;
1846       PetscInt  *jj, *garray = aij->garray;
1847       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1848       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1849       *norm = 0.0;
1850       v     = amata;
1851       jj    = amat->j;
1852       for (j = 0; j < amat->nz; j++) {
1853         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1854         v++;
1855       }
1856       v  = bmata;
1857       jj = bmat->j;
1858       for (j = 0; j < bmat->nz; j++) {
1859         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1860         v++;
1861       }
1862       PetscCall(PetscMPIIntCast(mat->cmap->N, &iN));
1863       PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1864       for (j = 0; j < mat->cmap->N; j++) {
1865         if (tmp2[j] > *norm) *norm = tmp2[j];
1866       }
1867       PetscCall(PetscFree(tmp));
1868       PetscCall(PetscFree(tmp2));
1869       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1870     } else if (type == NORM_INFINITY) { /* max row norm */
1871       PetscReal ntemp = 0.0;
1872       for (j = 0; j < aij->A->rmap->n; j++) {
1873         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1874         sum = 0.0;
1875         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1876           sum += PetscAbsScalar(*v);
1877           v++;
1878         }
1879         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1880         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1881           sum += PetscAbsScalar(*v);
1882           v++;
1883         }
1884         if (sum > ntemp) ntemp = sum;
1885       }
1886       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1887       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1888     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1889     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1890     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1891   }
1892   PetscFunctionReturn(PETSC_SUCCESS);
1893 }
1894 
1895 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1896 {
1897   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1898   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1899   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1900   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1901   Mat              B, A_diag, *B_diag;
1902   const MatScalar *pbv, *bv;
1903 
1904   PetscFunctionBegin;
1905   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1906   ma = A->rmap->n;
1907   na = A->cmap->n;
1908   mb = a->B->rmap->n;
1909   nb = a->B->cmap->n;
1910   ai = Aloc->i;
1911   aj = Aloc->j;
1912   bi = Bloc->i;
1913   bj = Bloc->j;
1914   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1915     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1916     PetscSFNode         *oloc;
1917     PETSC_UNUSED PetscSF sf;
1918 
1919     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1920     /* compute d_nnz for preallocation */
1921     PetscCall(PetscArrayzero(d_nnz, na));
1922     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1923     /* compute local off-diagonal contributions */
1924     PetscCall(PetscArrayzero(g_nnz, nb));
1925     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1926     /* map those to global */
1927     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1928     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1929     PetscCall(PetscSFSetFromOptions(sf));
1930     PetscCall(PetscArrayzero(o_nnz, na));
1931     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1932     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1933     PetscCall(PetscSFDestroy(&sf));
1934 
1935     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1936     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1937     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1938     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1939     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1940     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1941   } else {
1942     B = *matout;
1943     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1944   }
1945 
1946   b           = (Mat_MPIAIJ *)B->data;
1947   A_diag      = a->A;
1948   B_diag      = &b->A;
1949   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1950   A_diag_ncol = A_diag->cmap->N;
1951   B_diag_ilen = sub_B_diag->ilen;
1952   B_diag_i    = sub_B_diag->i;
1953 
1954   /* Set ilen for diagonal of B */
1955   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1956 
1957   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1958   very quickly (=without using MatSetValues), because all writes are local. */
1959   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1960   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1961 
1962   /* copy over the B part */
1963   PetscCall(PetscMalloc1(bi[mb], &cols));
1964   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1965   pbv = bv;
1966   row = A->rmap->rstart;
1967   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1968   cols_tmp = cols;
1969   for (i = 0; i < mb; i++) {
1970     ncol = bi[i + 1] - bi[i];
1971     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1972     row++;
1973     if (pbv) pbv += ncol;
1974     if (cols_tmp) cols_tmp += ncol;
1975   }
1976   PetscCall(PetscFree(cols));
1977   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1978 
1979   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1980   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1981   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1982     *matout = B;
1983   } else {
1984     PetscCall(MatHeaderMerge(A, &B));
1985   }
1986   PetscFunctionReturn(PETSC_SUCCESS);
1987 }
1988 
1989 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1990 {
1991   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1992   Mat         a = aij->A, b = aij->B;
1993   PetscInt    s1, s2, s3;
1994 
1995   PetscFunctionBegin;
1996   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1997   if (rr) {
1998     PetscCall(VecGetLocalSize(rr, &s1));
1999     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
2000     /* Overlap communication with computation. */
2001     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2002   }
2003   if (ll) {
2004     PetscCall(VecGetLocalSize(ll, &s1));
2005     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2006     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2007   }
2008   /* scale  the diagonal block */
2009   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2010 
2011   if (rr) {
2012     /* Do a scatter end and then right scale the off-diagonal block */
2013     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2014     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2015   }
2016   PetscFunctionReturn(PETSC_SUCCESS);
2017 }
2018 
2019 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2020 {
2021   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2022 
2023   PetscFunctionBegin;
2024   PetscCall(MatSetUnfactored(a->A));
2025   PetscFunctionReturn(PETSC_SUCCESS);
2026 }
2027 
2028 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2029 {
2030   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2031   Mat         a, b, c, d;
2032   PetscBool   flg;
2033 
2034   PetscFunctionBegin;
2035   a = matA->A;
2036   b = matA->B;
2037   c = matB->A;
2038   d = matB->B;
2039 
2040   PetscCall(MatEqual(a, c, &flg));
2041   if (flg) PetscCall(MatEqual(b, d, &flg));
2042   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2043   PetscFunctionReturn(PETSC_SUCCESS);
2044 }
2045 
2046 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2047 {
2048   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2049   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2050 
2051   PetscFunctionBegin;
2052   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2053   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2054     /* because of the column compression in the off-processor part of the matrix a->B,
2055        the number of columns in a->B and b->B may be different, hence we cannot call
2056        the MatCopy() directly on the two parts. If need be, we can provide a more
2057        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2058        then copying the submatrices */
2059     PetscCall(MatCopy_Basic(A, B, str));
2060   } else {
2061     PetscCall(MatCopy(a->A, b->A, str));
2062     PetscCall(MatCopy(a->B, b->B, str));
2063   }
2064   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2065   PetscFunctionReturn(PETSC_SUCCESS);
2066 }
2067 
2068 /*
2069    Computes the number of nonzeros per row needed for preallocation when X and Y
2070    have different nonzero structure.
2071 */
2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2073 {
2074   PetscInt i, j, k, nzx, nzy;
2075 
2076   PetscFunctionBegin;
2077   /* Set the number of nonzeros in the new matrix */
2078   for (i = 0; i < m; i++) {
2079     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2080     nzx    = xi[i + 1] - xi[i];
2081     nzy    = yi[i + 1] - yi[i];
2082     nnz[i] = 0;
2083     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2084       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2085       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2086       nnz[i]++;
2087     }
2088     for (; k < nzy; k++) nnz[i]++;
2089   }
2090   PetscFunctionReturn(PETSC_SUCCESS);
2091 }
2092 
2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2095 {
2096   PetscInt    m = Y->rmap->N;
2097   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2098   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2099 
2100   PetscFunctionBegin;
2101   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2102   PetscFunctionReturn(PETSC_SUCCESS);
2103 }
2104 
2105 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2106 {
2107   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2108 
2109   PetscFunctionBegin;
2110   if (str == SAME_NONZERO_PATTERN) {
2111     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2112     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2113   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2114     PetscCall(MatAXPY_Basic(Y, a, X, str));
2115   } else {
2116     Mat       B;
2117     PetscInt *nnz_d, *nnz_o;
2118 
2119     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2120     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2121     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2122     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2123     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2124     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2125     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2126     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2127     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2128     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2129     PetscCall(MatHeaderMerge(Y, &B));
2130     PetscCall(PetscFree(nnz_d));
2131     PetscCall(PetscFree(nnz_o));
2132   }
2133   PetscFunctionReturn(PETSC_SUCCESS);
2134 }
2135 
2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2137 
2138 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2139 {
2140   PetscFunctionBegin;
2141   if (PetscDefined(USE_COMPLEX)) {
2142     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2143 
2144     PetscCall(MatConjugate_SeqAIJ(aij->A));
2145     PetscCall(MatConjugate_SeqAIJ(aij->B));
2146   }
2147   PetscFunctionReturn(PETSC_SUCCESS);
2148 }
2149 
2150 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2151 {
2152   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2153 
2154   PetscFunctionBegin;
2155   PetscCall(MatRealPart(a->A));
2156   PetscCall(MatRealPart(a->B));
2157   PetscFunctionReturn(PETSC_SUCCESS);
2158 }
2159 
2160 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2161 {
2162   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2163 
2164   PetscFunctionBegin;
2165   PetscCall(MatImaginaryPart(a->A));
2166   PetscCall(MatImaginaryPart(a->B));
2167   PetscFunctionReturn(PETSC_SUCCESS);
2168 }
2169 
2170 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2171 {
2172   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2173   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2174   PetscScalar       *va, *vv;
2175   Vec                vB, vA;
2176   const PetscScalar *vb;
2177 
2178   PetscFunctionBegin;
2179   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2180   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2181 
2182   PetscCall(VecGetArrayWrite(vA, &va));
2183   if (idx) {
2184     for (i = 0; i < m; i++) {
2185       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2186     }
2187   }
2188 
2189   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2190   PetscCall(PetscMalloc1(m, &idxb));
2191   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2192 
2193   PetscCall(VecGetArrayWrite(v, &vv));
2194   PetscCall(VecGetArrayRead(vB, &vb));
2195   for (i = 0; i < m; i++) {
2196     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2197       vv[i] = vb[i];
2198       if (idx) idx[i] = a->garray[idxb[i]];
2199     } else {
2200       vv[i] = va[i];
2201       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2202     }
2203   }
2204   PetscCall(VecRestoreArrayWrite(vA, &vv));
2205   PetscCall(VecRestoreArrayWrite(vA, &va));
2206   PetscCall(VecRestoreArrayRead(vB, &vb));
2207   PetscCall(PetscFree(idxb));
2208   PetscCall(VecDestroy(&vA));
2209   PetscCall(VecDestroy(&vB));
2210   PetscFunctionReturn(PETSC_SUCCESS);
2211 }
2212 
2213 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2214 {
2215   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2216   Vec         vB, vA;
2217 
2218   PetscFunctionBegin;
2219   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2220   PetscCall(MatGetRowSumAbs(a->A, vA));
2221   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2222   PetscCall(MatGetRowSumAbs(a->B, vB));
2223   PetscCall(VecAXPY(vA, 1.0, vB));
2224   PetscCall(VecDestroy(&vB));
2225   PetscCall(VecCopy(vA, v));
2226   PetscCall(VecDestroy(&vA));
2227   PetscFunctionReturn(PETSC_SUCCESS);
2228 }
2229 
2230 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2231 {
2232   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2233   PetscInt           m = A->rmap->n, n = A->cmap->n;
2234   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2235   PetscInt          *cmap = mat->garray;
2236   PetscInt          *diagIdx, *offdiagIdx;
2237   Vec                diagV, offdiagV;
2238   PetscScalar       *a, *diagA, *offdiagA;
2239   const PetscScalar *ba, *bav;
2240   PetscInt           r, j, col, ncols, *bi, *bj;
2241   Mat                B = mat->B;
2242   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2243 
2244   PetscFunctionBegin;
2245   /* When a process holds entire A and other processes have no entry */
2246   if (A->cmap->N == n) {
2247     PetscCall(VecGetArrayWrite(v, &diagA));
2248     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2249     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2250     PetscCall(VecDestroy(&diagV));
2251     PetscCall(VecRestoreArrayWrite(v, &diagA));
2252     PetscFunctionReturn(PETSC_SUCCESS);
2253   } else if (n == 0) {
2254     if (m) {
2255       PetscCall(VecGetArrayWrite(v, &a));
2256       for (r = 0; r < m; r++) {
2257         a[r] = 0.0;
2258         if (idx) idx[r] = -1;
2259       }
2260       PetscCall(VecRestoreArrayWrite(v, &a));
2261     }
2262     PetscFunctionReturn(PETSC_SUCCESS);
2263   }
2264 
2265   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2266   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2267   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2268   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2269 
2270   /* Get offdiagIdx[] for implicit 0.0 */
2271   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2272   ba = bav;
2273   bi = b->i;
2274   bj = b->j;
2275   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2276   for (r = 0; r < m; r++) {
2277     ncols = bi[r + 1] - bi[r];
2278     if (ncols == A->cmap->N - n) { /* Brow is dense */
2279       offdiagA[r]   = *ba;
2280       offdiagIdx[r] = cmap[0];
2281     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2282       offdiagA[r] = 0.0;
2283 
2284       /* Find first hole in the cmap */
2285       for (j = 0; j < ncols; j++) {
2286         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2287         if (col > j && j < cstart) {
2288           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2289           break;
2290         } else if (col > j + n && j >= cstart) {
2291           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2292           break;
2293         }
2294       }
2295       if (j == ncols && ncols < A->cmap->N - n) {
2296         /* a hole is outside compressed Bcols */
2297         if (ncols == 0) {
2298           if (cstart) {
2299             offdiagIdx[r] = 0;
2300           } else offdiagIdx[r] = cend;
2301         } else { /* ncols > 0 */
2302           offdiagIdx[r] = cmap[ncols - 1] + 1;
2303           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2304         }
2305       }
2306     }
2307 
2308     for (j = 0; j < ncols; j++) {
2309       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2310         offdiagA[r]   = *ba;
2311         offdiagIdx[r] = cmap[*bj];
2312       }
2313       ba++;
2314       bj++;
2315     }
2316   }
2317 
2318   PetscCall(VecGetArrayWrite(v, &a));
2319   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2320   for (r = 0; r < m; ++r) {
2321     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2322       a[r] = diagA[r];
2323       if (idx) idx[r] = cstart + diagIdx[r];
2324     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2325       a[r] = diagA[r];
2326       if (idx) {
2327         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2328           idx[r] = cstart + diagIdx[r];
2329         } else idx[r] = offdiagIdx[r];
2330       }
2331     } else {
2332       a[r] = offdiagA[r];
2333       if (idx) idx[r] = offdiagIdx[r];
2334     }
2335   }
2336   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2337   PetscCall(VecRestoreArrayWrite(v, &a));
2338   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2339   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2340   PetscCall(VecDestroy(&diagV));
2341   PetscCall(VecDestroy(&offdiagV));
2342   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2343   PetscFunctionReturn(PETSC_SUCCESS);
2344 }
2345 
2346 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2347 {
2348   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2349   PetscInt           m = A->rmap->n, n = A->cmap->n;
2350   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2351   PetscInt          *cmap = mat->garray;
2352   PetscInt          *diagIdx, *offdiagIdx;
2353   Vec                diagV, offdiagV;
2354   PetscScalar       *a, *diagA, *offdiagA;
2355   const PetscScalar *ba, *bav;
2356   PetscInt           r, j, col, ncols, *bi, *bj;
2357   Mat                B = mat->B;
2358   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2359 
2360   PetscFunctionBegin;
2361   /* When a process holds entire A and other processes have no entry */
2362   if (A->cmap->N == n) {
2363     PetscCall(VecGetArrayWrite(v, &diagA));
2364     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2365     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2366     PetscCall(VecDestroy(&diagV));
2367     PetscCall(VecRestoreArrayWrite(v, &diagA));
2368     PetscFunctionReturn(PETSC_SUCCESS);
2369   } else if (n == 0) {
2370     if (m) {
2371       PetscCall(VecGetArrayWrite(v, &a));
2372       for (r = 0; r < m; r++) {
2373         a[r] = PETSC_MAX_REAL;
2374         if (idx) idx[r] = -1;
2375       }
2376       PetscCall(VecRestoreArrayWrite(v, &a));
2377     }
2378     PetscFunctionReturn(PETSC_SUCCESS);
2379   }
2380 
2381   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2382   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2383   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2384   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2385 
2386   /* Get offdiagIdx[] for implicit 0.0 */
2387   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2388   ba = bav;
2389   bi = b->i;
2390   bj = b->j;
2391   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2392   for (r = 0; r < m; r++) {
2393     ncols = bi[r + 1] - bi[r];
2394     if (ncols == A->cmap->N - n) { /* Brow is dense */
2395       offdiagA[r]   = *ba;
2396       offdiagIdx[r] = cmap[0];
2397     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2398       offdiagA[r] = 0.0;
2399 
2400       /* Find first hole in the cmap */
2401       for (j = 0; j < ncols; j++) {
2402         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2403         if (col > j && j < cstart) {
2404           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2405           break;
2406         } else if (col > j + n && j >= cstart) {
2407           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2408           break;
2409         }
2410       }
2411       if (j == ncols && ncols < A->cmap->N - n) {
2412         /* a hole is outside compressed Bcols */
2413         if (ncols == 0) {
2414           if (cstart) {
2415             offdiagIdx[r] = 0;
2416           } else offdiagIdx[r] = cend;
2417         } else { /* ncols > 0 */
2418           offdiagIdx[r] = cmap[ncols - 1] + 1;
2419           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2420         }
2421       }
2422     }
2423 
2424     for (j = 0; j < ncols; j++) {
2425       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2426         offdiagA[r]   = *ba;
2427         offdiagIdx[r] = cmap[*bj];
2428       }
2429       ba++;
2430       bj++;
2431     }
2432   }
2433 
2434   PetscCall(VecGetArrayWrite(v, &a));
2435   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2436   for (r = 0; r < m; ++r) {
2437     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2438       a[r] = diagA[r];
2439       if (idx) idx[r] = cstart + diagIdx[r];
2440     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2441       a[r] = diagA[r];
2442       if (idx) {
2443         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2444           idx[r] = cstart + diagIdx[r];
2445         } else idx[r] = offdiagIdx[r];
2446       }
2447     } else {
2448       a[r] = offdiagA[r];
2449       if (idx) idx[r] = offdiagIdx[r];
2450     }
2451   }
2452   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2453   PetscCall(VecRestoreArrayWrite(v, &a));
2454   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2455   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2456   PetscCall(VecDestroy(&diagV));
2457   PetscCall(VecDestroy(&offdiagV));
2458   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2459   PetscFunctionReturn(PETSC_SUCCESS);
2460 }
2461 
2462 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2463 {
2464   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2465   PetscInt           m = A->rmap->n, n = A->cmap->n;
2466   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2467   PetscInt          *cmap = mat->garray;
2468   PetscInt          *diagIdx, *offdiagIdx;
2469   Vec                diagV, offdiagV;
2470   PetscScalar       *a, *diagA, *offdiagA;
2471   const PetscScalar *ba, *bav;
2472   PetscInt           r, j, col, ncols, *bi, *bj;
2473   Mat                B = mat->B;
2474   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2475 
2476   PetscFunctionBegin;
2477   /* When a process holds entire A and other processes have no entry */
2478   if (A->cmap->N == n) {
2479     PetscCall(VecGetArrayWrite(v, &diagA));
2480     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2481     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2482     PetscCall(VecDestroy(&diagV));
2483     PetscCall(VecRestoreArrayWrite(v, &diagA));
2484     PetscFunctionReturn(PETSC_SUCCESS);
2485   } else if (n == 0) {
2486     if (m) {
2487       PetscCall(VecGetArrayWrite(v, &a));
2488       for (r = 0; r < m; r++) {
2489         a[r] = PETSC_MIN_REAL;
2490         if (idx) idx[r] = -1;
2491       }
2492       PetscCall(VecRestoreArrayWrite(v, &a));
2493     }
2494     PetscFunctionReturn(PETSC_SUCCESS);
2495   }
2496 
2497   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2498   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2499   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2500   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2501 
2502   /* Get offdiagIdx[] for implicit 0.0 */
2503   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2504   ba = bav;
2505   bi = b->i;
2506   bj = b->j;
2507   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2508   for (r = 0; r < m; r++) {
2509     ncols = bi[r + 1] - bi[r];
2510     if (ncols == A->cmap->N - n) { /* Brow is dense */
2511       offdiagA[r]   = *ba;
2512       offdiagIdx[r] = cmap[0];
2513     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2514       offdiagA[r] = 0.0;
2515 
2516       /* Find first hole in the cmap */
2517       for (j = 0; j < ncols; j++) {
2518         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2519         if (col > j && j < cstart) {
2520           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2521           break;
2522         } else if (col > j + n && j >= cstart) {
2523           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2524           break;
2525         }
2526       }
2527       if (j == ncols && ncols < A->cmap->N - n) {
2528         /* a hole is outside compressed Bcols */
2529         if (ncols == 0) {
2530           if (cstart) {
2531             offdiagIdx[r] = 0;
2532           } else offdiagIdx[r] = cend;
2533         } else { /* ncols > 0 */
2534           offdiagIdx[r] = cmap[ncols - 1] + 1;
2535           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2536         }
2537       }
2538     }
2539 
2540     for (j = 0; j < ncols; j++) {
2541       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2542         offdiagA[r]   = *ba;
2543         offdiagIdx[r] = cmap[*bj];
2544       }
2545       ba++;
2546       bj++;
2547     }
2548   }
2549 
2550   PetscCall(VecGetArrayWrite(v, &a));
2551   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2552   for (r = 0; r < m; ++r) {
2553     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2554       a[r] = diagA[r];
2555       if (idx) idx[r] = cstart + diagIdx[r];
2556     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2557       a[r] = diagA[r];
2558       if (idx) {
2559         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2560           idx[r] = cstart + diagIdx[r];
2561         } else idx[r] = offdiagIdx[r];
2562       }
2563     } else {
2564       a[r] = offdiagA[r];
2565       if (idx) idx[r] = offdiagIdx[r];
2566     }
2567   }
2568   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2569   PetscCall(VecRestoreArrayWrite(v, &a));
2570   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2571   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2572   PetscCall(VecDestroy(&diagV));
2573   PetscCall(VecDestroy(&offdiagV));
2574   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2575   PetscFunctionReturn(PETSC_SUCCESS);
2576 }
2577 
2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2579 {
2580   Mat *dummy;
2581 
2582   PetscFunctionBegin;
2583   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2584   *newmat = *dummy;
2585   PetscCall(PetscFree(dummy));
2586   PetscFunctionReturn(PETSC_SUCCESS);
2587 }
2588 
2589 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2590 {
2591   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2592 
2593   PetscFunctionBegin;
2594   PetscCall(MatInvertBlockDiagonal(a->A, values));
2595   A->factorerrortype = a->A->factorerrortype;
2596   PetscFunctionReturn(PETSC_SUCCESS);
2597 }
2598 
2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2600 {
2601   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2602 
2603   PetscFunctionBegin;
2604   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2605   PetscCall(MatSetRandom(aij->A, rctx));
2606   if (x->assembled) {
2607     PetscCall(MatSetRandom(aij->B, rctx));
2608   } else {
2609     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2610   }
2611   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2612   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2613   PetscFunctionReturn(PETSC_SUCCESS);
2614 }
2615 
2616 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2617 {
2618   PetscFunctionBegin;
2619   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2620   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2621   PetscFunctionReturn(PETSC_SUCCESS);
2622 }
2623 
2624 /*@
2625   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2626 
2627   Not Collective
2628 
2629   Input Parameter:
2630 . A - the matrix
2631 
2632   Output Parameter:
2633 . nz - the number of nonzeros
2634 
2635   Level: advanced
2636 
2637 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2638 @*/
2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2640 {
2641   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2642   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2643   PetscBool   isaij;
2644 
2645   PetscFunctionBegin;
2646   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2647   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2648   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2649   PetscFunctionReturn(PETSC_SUCCESS);
2650 }
2651 
2652 /*@
2653   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2654 
2655   Collective
2656 
2657   Input Parameters:
2658 + A  - the matrix
2659 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2660 
2661   Level: advanced
2662 
2663 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2664 @*/
2665 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2666 {
2667   PetscFunctionBegin;
2668   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2669   PetscFunctionReturn(PETSC_SUCCESS);
2670 }
2671 
2672 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2673 {
2674   PetscBool sc = PETSC_FALSE, flg;
2675 
2676   PetscFunctionBegin;
2677   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2678   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2679   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2680   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2681   PetscOptionsHeadEnd();
2682   PetscFunctionReturn(PETSC_SUCCESS);
2683 }
2684 
2685 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2686 {
2687   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2688   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2689 
2690   PetscFunctionBegin;
2691   if (!Y->preallocated) {
2692     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2693   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2694     PetscInt nonew = aij->nonew;
2695     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2696     aij->nonew = nonew;
2697   }
2698   PetscCall(MatShift_Basic(Y, a));
2699   PetscFunctionReturn(PETSC_SUCCESS);
2700 }
2701 
2702 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2703 {
2704   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2705 
2706   PetscFunctionBegin;
2707   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2708   PetscCall(MatMissingDiagonal(a->A, missing, d));
2709   if (d) {
2710     PetscInt rstart;
2711     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2712     *d += rstart;
2713   }
2714   PetscFunctionReturn(PETSC_SUCCESS);
2715 }
2716 
2717 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2718 {
2719   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2720 
2721   PetscFunctionBegin;
2722   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2723   PetscFunctionReturn(PETSC_SUCCESS);
2724 }
2725 
2726 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2727 {
2728   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2729 
2730   PetscFunctionBegin;
2731   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2732   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2733   PetscFunctionReturn(PETSC_SUCCESS);
2734 }
2735 
2736 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2737                                        MatGetRow_MPIAIJ,
2738                                        MatRestoreRow_MPIAIJ,
2739                                        MatMult_MPIAIJ,
2740                                        /* 4*/ MatMultAdd_MPIAIJ,
2741                                        MatMultTranspose_MPIAIJ,
2742                                        MatMultTransposeAdd_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        /*10*/ NULL,
2747                                        NULL,
2748                                        NULL,
2749                                        MatSOR_MPIAIJ,
2750                                        MatTranspose_MPIAIJ,
2751                                        /*15*/ MatGetInfo_MPIAIJ,
2752                                        MatEqual_MPIAIJ,
2753                                        MatGetDiagonal_MPIAIJ,
2754                                        MatDiagonalScale_MPIAIJ,
2755                                        MatNorm_MPIAIJ,
2756                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2757                                        MatAssemblyEnd_MPIAIJ,
2758                                        MatSetOption_MPIAIJ,
2759                                        MatZeroEntries_MPIAIJ,
2760                                        /*24*/ MatZeroRows_MPIAIJ,
2761                                        NULL,
2762                                        NULL,
2763                                        NULL,
2764                                        NULL,
2765                                        /*29*/ MatSetUp_MPI_Hash,
2766                                        NULL,
2767                                        NULL,
2768                                        MatGetDiagonalBlock_MPIAIJ,
2769                                        NULL,
2770                                        /*34*/ MatDuplicate_MPIAIJ,
2771                                        NULL,
2772                                        NULL,
2773                                        NULL,
2774                                        NULL,
2775                                        /*39*/ MatAXPY_MPIAIJ,
2776                                        MatCreateSubMatrices_MPIAIJ,
2777                                        MatIncreaseOverlap_MPIAIJ,
2778                                        MatGetValues_MPIAIJ,
2779                                        MatCopy_MPIAIJ,
2780                                        /*44*/ MatGetRowMax_MPIAIJ,
2781                                        MatScale_MPIAIJ,
2782                                        MatShift_MPIAIJ,
2783                                        MatDiagonalSet_MPIAIJ,
2784                                        MatZeroRowsColumns_MPIAIJ,
2785                                        /*49*/ MatSetRandom_MPIAIJ,
2786                                        MatGetRowIJ_MPIAIJ,
2787                                        MatRestoreRowIJ_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2791                                        NULL,
2792                                        MatSetUnfactored_MPIAIJ,
2793                                        MatPermute_MPIAIJ,
2794                                        NULL,
2795                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2796                                        MatDestroy_MPIAIJ,
2797                                        MatView_MPIAIJ,
2798                                        NULL,
2799                                        NULL,
2800                                        /*64*/ NULL,
2801                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2806                                        MatGetRowMinAbs_MPIAIJ,
2807                                        NULL,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        /*75*/ MatFDColoringApply_AIJ,
2812                                        MatSetFromOptions_MPIAIJ,
2813                                        NULL,
2814                                        NULL,
2815                                        MatFindZeroDiagonals_MPIAIJ,
2816                                        /*80*/ NULL,
2817                                        NULL,
2818                                        NULL,
2819                                        /*83*/ MatLoad_MPIAIJ,
2820                                        NULL,
2821                                        NULL,
2822                                        NULL,
2823                                        NULL,
2824                                        NULL,
2825                                        /*89*/ NULL,
2826                                        NULL,
2827                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2828                                        NULL,
2829                                        NULL,
2830                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        NULL,
2834                                        MatBindToCPU_MPIAIJ,
2835                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2836                                        NULL,
2837                                        NULL,
2838                                        MatConjugate_MPIAIJ,
2839                                        NULL,
2840                                        /*104*/ MatSetValuesRow_MPIAIJ,
2841                                        MatRealPart_MPIAIJ,
2842                                        MatImaginaryPart_MPIAIJ,
2843                                        NULL,
2844                                        NULL,
2845                                        /*109*/ NULL,
2846                                        NULL,
2847                                        MatGetRowMin_MPIAIJ,
2848                                        NULL,
2849                                        MatMissingDiagonal_MPIAIJ,
2850                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2851                                        NULL,
2852                                        MatGetGhosts_MPIAIJ,
2853                                        NULL,
2854                                        NULL,
2855                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2856                                        NULL,
2857                                        NULL,
2858                                        NULL,
2859                                        MatGetMultiProcBlock_MPIAIJ,
2860                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2861                                        MatGetColumnReductions_MPIAIJ,
2862                                        MatInvertBlockDiagonal_MPIAIJ,
2863                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2864                                        MatCreateSubMatricesMPI_MPIAIJ,
2865                                        /*129*/ NULL,
2866                                        NULL,
2867                                        NULL,
2868                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2869                                        NULL,
2870                                        /*134*/ NULL,
2871                                        NULL,
2872                                        NULL,
2873                                        NULL,
2874                                        NULL,
2875                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2876                                        NULL,
2877                                        NULL,
2878                                        MatFDColoringSetUp_MPIXAIJ,
2879                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2880                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2881                                        /*145*/ NULL,
2882                                        NULL,
2883                                        NULL,
2884                                        MatCreateGraph_Simple_AIJ,
2885                                        NULL,
2886                                        /*150*/ NULL,
2887                                        MatEliminateZeros_MPIAIJ,
2888                                        MatGetRowSumAbs_MPIAIJ,
2889                                        NULL,
2890                                        NULL,
2891                                        /*155*/ NULL,
2892                                        MatCopyHashToXAIJ_MPI_Hash};
2893 
2894 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2895 {
2896   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2897 
2898   PetscFunctionBegin;
2899   PetscCall(MatStoreValues(aij->A));
2900   PetscCall(MatStoreValues(aij->B));
2901   PetscFunctionReturn(PETSC_SUCCESS);
2902 }
2903 
2904 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2905 {
2906   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2907 
2908   PetscFunctionBegin;
2909   PetscCall(MatRetrieveValues(aij->A));
2910   PetscCall(MatRetrieveValues(aij->B));
2911   PetscFunctionReturn(PETSC_SUCCESS);
2912 }
2913 
2914 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2915 {
2916   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2917   PetscMPIInt size;
2918 
2919   PetscFunctionBegin;
2920   if (B->hash_active) {
2921     B->ops[0]      = b->cops;
2922     B->hash_active = PETSC_FALSE;
2923   }
2924   PetscCall(PetscLayoutSetUp(B->rmap));
2925   PetscCall(PetscLayoutSetUp(B->cmap));
2926 
2927 #if defined(PETSC_USE_CTABLE)
2928   PetscCall(PetscHMapIDestroy(&b->colmap));
2929 #else
2930   PetscCall(PetscFree(b->colmap));
2931 #endif
2932   PetscCall(PetscFree(b->garray));
2933   PetscCall(VecDestroy(&b->lvec));
2934   PetscCall(VecScatterDestroy(&b->Mvctx));
2935 
2936   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2937 
2938   MatSeqXAIJGetOptions_Private(b->B);
2939   PetscCall(MatDestroy(&b->B));
2940   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2941   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2942   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2943   PetscCall(MatSetType(b->B, MATSEQAIJ));
2944   MatSeqXAIJRestoreOptions_Private(b->B);
2945 
2946   MatSeqXAIJGetOptions_Private(b->A);
2947   PetscCall(MatDestroy(&b->A));
2948   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2949   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2950   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2951   PetscCall(MatSetType(b->A, MATSEQAIJ));
2952   MatSeqXAIJRestoreOptions_Private(b->A);
2953 
2954   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2955   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2956   B->preallocated  = PETSC_TRUE;
2957   B->was_assembled = PETSC_FALSE;
2958   B->assembled     = PETSC_FALSE;
2959   PetscFunctionReturn(PETSC_SUCCESS);
2960 }
2961 
2962 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2963 {
2964   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2965 
2966   PetscFunctionBegin;
2967   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2968   PetscCall(PetscLayoutSetUp(B->rmap));
2969   PetscCall(PetscLayoutSetUp(B->cmap));
2970 
2971 #if defined(PETSC_USE_CTABLE)
2972   PetscCall(PetscHMapIDestroy(&b->colmap));
2973 #else
2974   PetscCall(PetscFree(b->colmap));
2975 #endif
2976   PetscCall(PetscFree(b->garray));
2977   PetscCall(VecDestroy(&b->lvec));
2978   PetscCall(VecScatterDestroy(&b->Mvctx));
2979 
2980   PetscCall(MatResetPreallocation(b->A));
2981   PetscCall(MatResetPreallocation(b->B));
2982   B->preallocated  = PETSC_TRUE;
2983   B->was_assembled = PETSC_FALSE;
2984   B->assembled     = PETSC_FALSE;
2985   PetscFunctionReturn(PETSC_SUCCESS);
2986 }
2987 
2988 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2989 {
2990   Mat         mat;
2991   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2992 
2993   PetscFunctionBegin;
2994   *newmat = NULL;
2995   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2996   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2997   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2998   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2999   a = (Mat_MPIAIJ *)mat->data;
3000 
3001   mat->factortype = matin->factortype;
3002   mat->assembled  = matin->assembled;
3003   mat->insertmode = NOT_SET_VALUES;
3004 
3005   a->size         = oldmat->size;
3006   a->rank         = oldmat->rank;
3007   a->donotstash   = oldmat->donotstash;
3008   a->roworiented  = oldmat->roworiented;
3009   a->rowindices   = NULL;
3010   a->rowvalues    = NULL;
3011   a->getrowactive = PETSC_FALSE;
3012 
3013   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3014   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3015   if (matin->hash_active) {
3016     PetscCall(MatSetUp(mat));
3017   } else {
3018     mat->preallocated = matin->preallocated;
3019     if (oldmat->colmap) {
3020 #if defined(PETSC_USE_CTABLE)
3021       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3022 #else
3023       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3024       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3025 #endif
3026     } else a->colmap = NULL;
3027     if (oldmat->garray) {
3028       PetscInt len;
3029       len = oldmat->B->cmap->n;
3030       PetscCall(PetscMalloc1(len + 1, &a->garray));
3031       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3032     } else a->garray = NULL;
3033 
3034     /* It may happen MatDuplicate is called with a non-assembled matrix
3035       In fact, MatDuplicate only requires the matrix to be preallocated
3036       This may happen inside a DMCreateMatrix_Shell */
3037     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3038     if (oldmat->Mvctx) {
3039       a->Mvctx = oldmat->Mvctx;
3040       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3041     }
3042     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3043     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3044   }
3045   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3046   *newmat = mat;
3047   PetscFunctionReturn(PETSC_SUCCESS);
3048 }
3049 
3050 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3051 {
3052   PetscBool isbinary, ishdf5;
3053 
3054   PetscFunctionBegin;
3055   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3056   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3057   /* force binary viewer to load .info file if it has not yet done so */
3058   PetscCall(PetscViewerSetUp(viewer));
3059   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3060   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3061   if (isbinary) {
3062     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3063   } else if (ishdf5) {
3064 #if defined(PETSC_HAVE_HDF5)
3065     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3066 #else
3067     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3068 #endif
3069   } else {
3070     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3071   }
3072   PetscFunctionReturn(PETSC_SUCCESS);
3073 }
3074 
3075 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3076 {
3077   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3078   PetscInt    *rowidxs, *colidxs;
3079   PetscScalar *matvals;
3080 
3081   PetscFunctionBegin;
3082   PetscCall(PetscViewerSetUp(viewer));
3083 
3084   /* read in matrix header */
3085   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3086   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3087   M  = header[1];
3088   N  = header[2];
3089   nz = header[3];
3090   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3091   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3092   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3093 
3094   /* set block sizes from the viewer's .info file */
3095   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3096   /* set global sizes if not set already */
3097   if (mat->rmap->N < 0) mat->rmap->N = M;
3098   if (mat->cmap->N < 0) mat->cmap->N = N;
3099   PetscCall(PetscLayoutSetUp(mat->rmap));
3100   PetscCall(PetscLayoutSetUp(mat->cmap));
3101 
3102   /* check if the matrix sizes are correct */
3103   PetscCall(MatGetSize(mat, &rows, &cols));
3104   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3105 
3106   /* read in row lengths and build row indices */
3107   PetscCall(MatGetLocalSize(mat, &m, NULL));
3108   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3109   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3110   rowidxs[0] = 0;
3111   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3112   if (nz != PETSC_INT_MAX) {
3113     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3114     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3115   }
3116 
3117   /* read in column indices and matrix values */
3118   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3119   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3120   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3121   /* store matrix indices and values */
3122   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3123   PetscCall(PetscFree(rowidxs));
3124   PetscCall(PetscFree2(colidxs, matvals));
3125   PetscFunctionReturn(PETSC_SUCCESS);
3126 }
3127 
3128 /* Not scalable because of ISAllGather() unless getting all columns. */
3129 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3130 {
3131   IS          iscol_local;
3132   PetscBool   isstride;
3133   PetscMPIInt lisstride = 0, gisstride;
3134 
3135   PetscFunctionBegin;
3136   /* check if we are grabbing all columns*/
3137   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3138 
3139   if (isstride) {
3140     PetscInt start, len, mstart, mlen;
3141     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3142     PetscCall(ISGetLocalSize(iscol, &len));
3143     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3144     if (mstart == start && mlen - mstart == len) lisstride = 1;
3145   }
3146 
3147   PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3148   if (gisstride) {
3149     PetscInt N;
3150     PetscCall(MatGetSize(mat, NULL, &N));
3151     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3152     PetscCall(ISSetIdentity(iscol_local));
3153     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3154   } else {
3155     PetscInt cbs;
3156     PetscCall(ISGetBlockSize(iscol, &cbs));
3157     PetscCall(ISAllGather(iscol, &iscol_local));
3158     PetscCall(ISSetBlockSize(iscol_local, cbs));
3159   }
3160 
3161   *isseq = iscol_local;
3162   PetscFunctionReturn(PETSC_SUCCESS);
3163 }
3164 
3165 /*
3166  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3167  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3168 
3169  Input Parameters:
3170 +   mat - matrix
3171 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3172            i.e., mat->rstart <= isrow[i] < mat->rend
3173 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3174            i.e., mat->cstart <= iscol[i] < mat->cend
3175 
3176  Output Parameters:
3177 +   isrow_d - sequential row index set for retrieving mat->A
3178 .   iscol_d - sequential  column index set for retrieving mat->A
3179 .   iscol_o - sequential column index set for retrieving mat->B
3180 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3181  */
3182 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3183 {
3184   Vec             x, cmap;
3185   const PetscInt *is_idx;
3186   PetscScalar    *xarray, *cmaparray;
3187   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3188   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3189   Mat             B    = a->B;
3190   Vec             lvec = a->lvec, lcmap;
3191   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3192   MPI_Comm        comm;
3193   VecScatter      Mvctx = a->Mvctx;
3194 
3195   PetscFunctionBegin;
3196   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3197   PetscCall(ISGetLocalSize(iscol, &ncols));
3198 
3199   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3200   PetscCall(MatCreateVecs(mat, &x, NULL));
3201   PetscCall(VecSet(x, -1.0));
3202   PetscCall(VecDuplicate(x, &cmap));
3203   PetscCall(VecSet(cmap, -1.0));
3204 
3205   /* Get start indices */
3206   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3207   isstart -= ncols;
3208   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3209 
3210   PetscCall(ISGetIndices(iscol, &is_idx));
3211   PetscCall(VecGetArray(x, &xarray));
3212   PetscCall(VecGetArray(cmap, &cmaparray));
3213   PetscCall(PetscMalloc1(ncols, &idx));
3214   for (i = 0; i < ncols; i++) {
3215     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3216     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3217     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3218   }
3219   PetscCall(VecRestoreArray(x, &xarray));
3220   PetscCall(VecRestoreArray(cmap, &cmaparray));
3221   PetscCall(ISRestoreIndices(iscol, &is_idx));
3222 
3223   /* Get iscol_d */
3224   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3225   PetscCall(ISGetBlockSize(iscol, &i));
3226   PetscCall(ISSetBlockSize(*iscol_d, i));
3227 
3228   /* Get isrow_d */
3229   PetscCall(ISGetLocalSize(isrow, &m));
3230   rstart = mat->rmap->rstart;
3231   PetscCall(PetscMalloc1(m, &idx));
3232   PetscCall(ISGetIndices(isrow, &is_idx));
3233   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3234   PetscCall(ISRestoreIndices(isrow, &is_idx));
3235 
3236   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3237   PetscCall(ISGetBlockSize(isrow, &i));
3238   PetscCall(ISSetBlockSize(*isrow_d, i));
3239 
3240   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3241   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3242   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3243 
3244   PetscCall(VecDuplicate(lvec, &lcmap));
3245 
3246   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3247   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3248 
3249   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3250   /* off-process column indices */
3251   count = 0;
3252   PetscCall(PetscMalloc1(Bn, &idx));
3253   PetscCall(PetscMalloc1(Bn, &cmap1));
3254 
3255   PetscCall(VecGetArray(lvec, &xarray));
3256   PetscCall(VecGetArray(lcmap, &cmaparray));
3257   for (i = 0; i < Bn; i++) {
3258     if (PetscRealPart(xarray[i]) > -1.0) {
3259       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3260       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3261       count++;
3262     }
3263   }
3264   PetscCall(VecRestoreArray(lvec, &xarray));
3265   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3266 
3267   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3268   /* cannot ensure iscol_o has same blocksize as iscol! */
3269 
3270   PetscCall(PetscFree(idx));
3271   *garray = cmap1;
3272 
3273   PetscCall(VecDestroy(&x));
3274   PetscCall(VecDestroy(&cmap));
3275   PetscCall(VecDestroy(&lcmap));
3276   PetscFunctionReturn(PETSC_SUCCESS);
3277 }
3278 
3279 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3280 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3281 {
3282   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3283   Mat         M = NULL;
3284   MPI_Comm    comm;
3285   IS          iscol_d, isrow_d, iscol_o;
3286   Mat         Asub = NULL, Bsub = NULL;
3287   PetscInt    n;
3288 
3289   PetscFunctionBegin;
3290   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3291 
3292   if (call == MAT_REUSE_MATRIX) {
3293     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3294     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3295     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3296 
3297     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3298     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3299 
3300     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3301     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3302 
3303     /* Update diagonal and off-diagonal portions of submat */
3304     asub = (Mat_MPIAIJ *)(*submat)->data;
3305     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3306     PetscCall(ISGetLocalSize(iscol_o, &n));
3307     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3308     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3309     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3310 
3311   } else { /* call == MAT_INITIAL_MATRIX) */
3312     PetscInt *garray;
3313     PetscInt  BsubN;
3314 
3315     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3316     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3317 
3318     /* Create local submatrices Asub and Bsub */
3319     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3320     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3321 
3322     /* Create submatrix M */
3323     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3324 
3325     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3326     asub = (Mat_MPIAIJ *)M->data;
3327 
3328     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3329     n = asub->B->cmap->N;
3330     if (BsubN > n) {
3331       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3332       const PetscInt *idx;
3333       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3334       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3335 
3336       PetscCall(PetscMalloc1(n, &idx_new));
3337       j = 0;
3338       PetscCall(ISGetIndices(iscol_o, &idx));
3339       for (i = 0; i < n; i++) {
3340         if (j >= BsubN) break;
3341         while (subgarray[i] > garray[j]) j++;
3342 
3343         if (subgarray[i] == garray[j]) {
3344           idx_new[i] = idx[j++];
3345         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3346       }
3347       PetscCall(ISRestoreIndices(iscol_o, &idx));
3348 
3349       PetscCall(ISDestroy(&iscol_o));
3350       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3351 
3352     } else if (BsubN < n) {
3353       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3354     }
3355 
3356     PetscCall(PetscFree(garray));
3357     *submat = M;
3358 
3359     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3360     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3361     PetscCall(ISDestroy(&isrow_d));
3362 
3363     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3364     PetscCall(ISDestroy(&iscol_d));
3365 
3366     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3367     PetscCall(ISDestroy(&iscol_o));
3368   }
3369   PetscFunctionReturn(PETSC_SUCCESS);
3370 }
3371 
3372 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3373 {
3374   IS        iscol_local = NULL, isrow_d;
3375   PetscInt  csize;
3376   PetscInt  n, i, j, start, end;
3377   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3378   MPI_Comm  comm;
3379 
3380   PetscFunctionBegin;
3381   /* If isrow has same processor distribution as mat,
3382      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3383   if (call == MAT_REUSE_MATRIX) {
3384     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3385     if (isrow_d) {
3386       sameRowDist  = PETSC_TRUE;
3387       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3388     } else {
3389       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3390       if (iscol_local) {
3391         sameRowDist  = PETSC_TRUE;
3392         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3393       }
3394     }
3395   } else {
3396     /* Check if isrow has same processor distribution as mat */
3397     sameDist[0] = PETSC_FALSE;
3398     PetscCall(ISGetLocalSize(isrow, &n));
3399     if (!n) {
3400       sameDist[0] = PETSC_TRUE;
3401     } else {
3402       PetscCall(ISGetMinMax(isrow, &i, &j));
3403       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3404       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3405     }
3406 
3407     /* Check if iscol has same processor distribution as mat */
3408     sameDist[1] = PETSC_FALSE;
3409     PetscCall(ISGetLocalSize(iscol, &n));
3410     if (!n) {
3411       sameDist[1] = PETSC_TRUE;
3412     } else {
3413       PetscCall(ISGetMinMax(iscol, &i, &j));
3414       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3415       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3416     }
3417 
3418     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3419     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3420     sameRowDist = tsameDist[0];
3421   }
3422 
3423   if (sameRowDist) {
3424     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3425       /* isrow and iscol have same processor distribution as mat */
3426       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3427       PetscFunctionReturn(PETSC_SUCCESS);
3428     } else { /* sameRowDist */
3429       /* isrow has same processor distribution as mat */
3430       if (call == MAT_INITIAL_MATRIX) {
3431         PetscBool sorted;
3432         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3433         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3434         PetscCall(ISGetSize(iscol, &i));
3435         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3436 
3437         PetscCall(ISSorted(iscol_local, &sorted));
3438         if (sorted) {
3439           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3440           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3441           PetscFunctionReturn(PETSC_SUCCESS);
3442         }
3443       } else { /* call == MAT_REUSE_MATRIX */
3444         IS iscol_sub;
3445         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3446         if (iscol_sub) {
3447           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3448           PetscFunctionReturn(PETSC_SUCCESS);
3449         }
3450       }
3451     }
3452   }
3453 
3454   /* General case: iscol -> iscol_local which has global size of iscol */
3455   if (call == MAT_REUSE_MATRIX) {
3456     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3457     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3458   } else {
3459     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3460   }
3461 
3462   PetscCall(ISGetLocalSize(iscol, &csize));
3463   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3464 
3465   if (call == MAT_INITIAL_MATRIX) {
3466     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3467     PetscCall(ISDestroy(&iscol_local));
3468   }
3469   PetscFunctionReturn(PETSC_SUCCESS);
3470 }
3471 
3472 /*@C
3473   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3474   and "off-diagonal" part of the matrix in CSR format.
3475 
3476   Collective
3477 
3478   Input Parameters:
3479 + comm   - MPI communicator
3480 . A      - "diagonal" portion of matrix
3481 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3482 - garray - global index of `B` columns
3483 
3484   Output Parameter:
3485 . mat - the matrix, with input `A` as its local diagonal matrix
3486 
3487   Level: advanced
3488 
3489   Notes:
3490   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3491 
3492   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3493 
3494 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3495 @*/
3496 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3497 {
3498   Mat_MPIAIJ        *maij;
3499   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3500   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3501   const PetscScalar *oa;
3502   Mat                Bnew;
3503   PetscInt           m, n, N;
3504   MatType            mpi_mat_type;
3505 
3506   PetscFunctionBegin;
3507   PetscCall(MatCreate(comm, mat));
3508   PetscCall(MatGetSize(A, &m, &n));
3509   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3510   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3511   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3512   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3513 
3514   /* Get global columns of mat */
3515   PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3516 
3517   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3518   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3519   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3520   PetscCall(MatSetType(*mat, mpi_mat_type));
3521 
3522   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3523   maij = (Mat_MPIAIJ *)(*mat)->data;
3524 
3525   (*mat)->preallocated = PETSC_TRUE;
3526 
3527   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3528   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3529 
3530   /* Set A as diagonal portion of *mat */
3531   maij->A = A;
3532 
3533   nz = oi[m];
3534   for (i = 0; i < nz; i++) {
3535     col   = oj[i];
3536     oj[i] = garray[col];
3537   }
3538 
3539   /* Set Bnew as off-diagonal portion of *mat */
3540   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3541   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3542   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3543   bnew        = (Mat_SeqAIJ *)Bnew->data;
3544   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3545   maij->B     = Bnew;
3546 
3547   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3548 
3549   b->free_a  = PETSC_FALSE;
3550   b->free_ij = PETSC_FALSE;
3551   PetscCall(MatDestroy(&B));
3552 
3553   bnew->free_a  = PETSC_TRUE;
3554   bnew->free_ij = PETSC_TRUE;
3555 
3556   /* condense columns of maij->B */
3557   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3558   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3559   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3560   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3561   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3562   PetscFunctionReturn(PETSC_SUCCESS);
3563 }
3564 
3565 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3566 
3567 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3568 {
3569   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3570   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3571   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3572   Mat             M, Msub, B = a->B;
3573   MatScalar      *aa;
3574   Mat_SeqAIJ     *aij;
3575   PetscInt       *garray = a->garray, *colsub, Ncols;
3576   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3577   IS              iscol_sub, iscmap;
3578   const PetscInt *is_idx, *cmap;
3579   PetscBool       allcolumns = PETSC_FALSE;
3580   MPI_Comm        comm;
3581 
3582   PetscFunctionBegin;
3583   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3584   if (call == MAT_REUSE_MATRIX) {
3585     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3586     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3587     PetscCall(ISGetLocalSize(iscol_sub, &count));
3588 
3589     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3590     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3591 
3592     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3593     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3594 
3595     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3596 
3597   } else { /* call == MAT_INITIAL_MATRIX) */
3598     PetscBool flg;
3599 
3600     PetscCall(ISGetLocalSize(iscol, &n));
3601     PetscCall(ISGetSize(iscol, &Ncols));
3602 
3603     /* (1) iscol -> nonscalable iscol_local */
3604     /* Check for special case: each processor gets entire matrix columns */
3605     PetscCall(ISIdentity(iscol_local, &flg));
3606     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3607     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3608     if (allcolumns) {
3609       iscol_sub = iscol_local;
3610       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3611       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3612 
3613     } else {
3614       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3615       PetscInt *idx, *cmap1, k;
3616       PetscCall(PetscMalloc1(Ncols, &idx));
3617       PetscCall(PetscMalloc1(Ncols, &cmap1));
3618       PetscCall(ISGetIndices(iscol_local, &is_idx));
3619       count = 0;
3620       k     = 0;
3621       for (i = 0; i < Ncols; i++) {
3622         j = is_idx[i];
3623         if (j >= cstart && j < cend) {
3624           /* diagonal part of mat */
3625           idx[count]     = j;
3626           cmap1[count++] = i; /* column index in submat */
3627         } else if (Bn) {
3628           /* off-diagonal part of mat */
3629           if (j == garray[k]) {
3630             idx[count]     = j;
3631             cmap1[count++] = i; /* column index in submat */
3632           } else if (j > garray[k]) {
3633             while (j > garray[k] && k < Bn - 1) k++;
3634             if (j == garray[k]) {
3635               idx[count]     = j;
3636               cmap1[count++] = i; /* column index in submat */
3637             }
3638           }
3639         }
3640       }
3641       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3642 
3643       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3644       PetscCall(ISGetBlockSize(iscol, &cbs));
3645       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3646 
3647       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3648     }
3649 
3650     /* (3) Create sequential Msub */
3651     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3652   }
3653 
3654   PetscCall(ISGetLocalSize(iscol_sub, &count));
3655   aij = (Mat_SeqAIJ *)Msub->data;
3656   ii  = aij->i;
3657   PetscCall(ISGetIndices(iscmap, &cmap));
3658 
3659   /*
3660       m - number of local rows
3661       Ncols - number of columns (same on all processors)
3662       rstart - first row in new global matrix generated
3663   */
3664   PetscCall(MatGetSize(Msub, &m, NULL));
3665 
3666   if (call == MAT_INITIAL_MATRIX) {
3667     /* (4) Create parallel newmat */
3668     PetscMPIInt rank, size;
3669     PetscInt    csize;
3670 
3671     PetscCallMPI(MPI_Comm_size(comm, &size));
3672     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3673 
3674     /*
3675         Determine the number of non-zeros in the diagonal and off-diagonal
3676         portions of the matrix in order to do correct preallocation
3677     */
3678 
3679     /* first get start and end of "diagonal" columns */
3680     PetscCall(ISGetLocalSize(iscol, &csize));
3681     if (csize == PETSC_DECIDE) {
3682       PetscCall(ISGetSize(isrow, &mglobal));
3683       if (mglobal == Ncols) { /* square matrix */
3684         nlocal = m;
3685       } else {
3686         nlocal = Ncols / size + ((Ncols % size) > rank);
3687       }
3688     } else {
3689       nlocal = csize;
3690     }
3691     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3692     rstart = rend - nlocal;
3693     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3694 
3695     /* next, compute all the lengths */
3696     jj = aij->j;
3697     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3698     olens = dlens + m;
3699     for (i = 0; i < m; i++) {
3700       jend = ii[i + 1] - ii[i];
3701       olen = 0;
3702       dlen = 0;
3703       for (j = 0; j < jend; j++) {
3704         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3705         else dlen++;
3706         jj++;
3707       }
3708       olens[i] = olen;
3709       dlens[i] = dlen;
3710     }
3711 
3712     PetscCall(ISGetBlockSize(isrow, &bs));
3713     PetscCall(ISGetBlockSize(iscol, &cbs));
3714 
3715     PetscCall(MatCreate(comm, &M));
3716     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3717     PetscCall(MatSetBlockSizes(M, bs, cbs));
3718     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3719     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3720     PetscCall(PetscFree(dlens));
3721 
3722   } else { /* call == MAT_REUSE_MATRIX */
3723     M = *newmat;
3724     PetscCall(MatGetLocalSize(M, &i, NULL));
3725     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3726     PetscCall(MatZeroEntries(M));
3727     /*
3728          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3729        rather than the slower MatSetValues().
3730     */
3731     M->was_assembled = PETSC_TRUE;
3732     M->assembled     = PETSC_FALSE;
3733   }
3734 
3735   /* (5) Set values of Msub to *newmat */
3736   PetscCall(PetscMalloc1(count, &colsub));
3737   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3738 
3739   jj = aij->j;
3740   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3741   for (i = 0; i < m; i++) {
3742     row = rstart + i;
3743     nz  = ii[i + 1] - ii[i];
3744     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3745     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3746     jj += nz;
3747     aa += nz;
3748   }
3749   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3750   PetscCall(ISRestoreIndices(iscmap, &cmap));
3751 
3752   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3753   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3754 
3755   PetscCall(PetscFree(colsub));
3756 
3757   /* save Msub, iscol_sub and iscmap used in processor for next request */
3758   if (call == MAT_INITIAL_MATRIX) {
3759     *newmat = M;
3760     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3761     PetscCall(MatDestroy(&Msub));
3762 
3763     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3764     PetscCall(ISDestroy(&iscol_sub));
3765 
3766     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3767     PetscCall(ISDestroy(&iscmap));
3768 
3769     if (iscol_local) {
3770       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3771       PetscCall(ISDestroy(&iscol_local));
3772     }
3773   }
3774   PetscFunctionReturn(PETSC_SUCCESS);
3775 }
3776 
3777 /*
3778     Not great since it makes two copies of the submatrix, first an SeqAIJ
3779   in local and then by concatenating the local matrices the end result.
3780   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3781 
3782   This requires a sequential iscol with all indices.
3783 */
3784 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3785 {
3786   PetscMPIInt rank, size;
3787   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3788   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3789   Mat         M, Mreuse;
3790   MatScalar  *aa, *vwork;
3791   MPI_Comm    comm;
3792   Mat_SeqAIJ *aij;
3793   PetscBool   colflag, allcolumns = PETSC_FALSE;
3794 
3795   PetscFunctionBegin;
3796   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3797   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3798   PetscCallMPI(MPI_Comm_size(comm, &size));
3799 
3800   /* Check for special case: each processor gets entire matrix columns */
3801   PetscCall(ISIdentity(iscol, &colflag));
3802   PetscCall(ISGetLocalSize(iscol, &n));
3803   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3804   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3805 
3806   if (call == MAT_REUSE_MATRIX) {
3807     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3808     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3809     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3810   } else {
3811     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3812   }
3813 
3814   /*
3815       m - number of local rows
3816       n - number of columns (same on all processors)
3817       rstart - first row in new global matrix generated
3818   */
3819   PetscCall(MatGetSize(Mreuse, &m, &n));
3820   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3821   if (call == MAT_INITIAL_MATRIX) {
3822     aij = (Mat_SeqAIJ *)Mreuse->data;
3823     ii  = aij->i;
3824     jj  = aij->j;
3825 
3826     /*
3827         Determine the number of non-zeros in the diagonal and off-diagonal
3828         portions of the matrix in order to do correct preallocation
3829     */
3830 
3831     /* first get start and end of "diagonal" columns */
3832     if (csize == PETSC_DECIDE) {
3833       PetscCall(ISGetSize(isrow, &mglobal));
3834       if (mglobal == n) { /* square matrix */
3835         nlocal = m;
3836       } else {
3837         nlocal = n / size + ((n % size) > rank);
3838       }
3839     } else {
3840       nlocal = csize;
3841     }
3842     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3843     rstart = rend - nlocal;
3844     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3845 
3846     /* next, compute all the lengths */
3847     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3848     olens = dlens + m;
3849     for (i = 0; i < m; i++) {
3850       jend = ii[i + 1] - ii[i];
3851       olen = 0;
3852       dlen = 0;
3853       for (j = 0; j < jend; j++) {
3854         if (*jj < rstart || *jj >= rend) olen++;
3855         else dlen++;
3856         jj++;
3857       }
3858       olens[i] = olen;
3859       dlens[i] = dlen;
3860     }
3861     PetscCall(MatCreate(comm, &M));
3862     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3863     PetscCall(MatSetBlockSizes(M, bs, cbs));
3864     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3865     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3866     PetscCall(PetscFree(dlens));
3867   } else {
3868     PetscInt ml, nl;
3869 
3870     M = *newmat;
3871     PetscCall(MatGetLocalSize(M, &ml, &nl));
3872     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3873     PetscCall(MatZeroEntries(M));
3874     /*
3875          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3876        rather than the slower MatSetValues().
3877     */
3878     M->was_assembled = PETSC_TRUE;
3879     M->assembled     = PETSC_FALSE;
3880   }
3881   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3882   aij = (Mat_SeqAIJ *)Mreuse->data;
3883   ii  = aij->i;
3884   jj  = aij->j;
3885 
3886   /* trigger copy to CPU if needed */
3887   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3888   for (i = 0; i < m; i++) {
3889     row   = rstart + i;
3890     nz    = ii[i + 1] - ii[i];
3891     cwork = jj;
3892     jj    = PetscSafePointerPlusOffset(jj, nz);
3893     vwork = aa;
3894     aa    = PetscSafePointerPlusOffset(aa, nz);
3895     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3896   }
3897   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3898 
3899   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3900   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3901   *newmat = M;
3902 
3903   /* save submatrix used in processor for next request */
3904   if (call == MAT_INITIAL_MATRIX) {
3905     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3906     PetscCall(MatDestroy(&Mreuse));
3907   }
3908   PetscFunctionReturn(PETSC_SUCCESS);
3909 }
3910 
3911 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3912 {
3913   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3914   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3915   const PetscInt *JJ;
3916   PetscBool       nooffprocentries;
3917   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3918 
3919   PetscFunctionBegin;
3920   PetscCall(PetscLayoutSetUp(B->rmap));
3921   PetscCall(PetscLayoutSetUp(B->cmap));
3922   m       = B->rmap->n;
3923   cstart  = B->cmap->rstart;
3924   cend    = B->cmap->rend;
3925   rstart  = B->rmap->rstart;
3926   irstart = Ii[0];
3927 
3928   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3929 
3930   if (PetscDefined(USE_DEBUG)) {
3931     for (i = 0; i < m; i++) {
3932       nnz = Ii[i + 1] - Ii[i];
3933       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3934       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3935       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3936       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3937     }
3938   }
3939 
3940   for (i = 0; i < m; i++) {
3941     nnz     = Ii[i + 1] - Ii[i];
3942     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3943     nnz_max = PetscMax(nnz_max, nnz);
3944     d       = 0;
3945     for (j = 0; j < nnz; j++) {
3946       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3947     }
3948     d_nnz[i] = d;
3949     o_nnz[i] = nnz - d;
3950   }
3951   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3952   PetscCall(PetscFree2(d_nnz, o_nnz));
3953 
3954   for (i = 0; i < m; i++) {
3955     ii = i + rstart;
3956     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3957   }
3958   nooffprocentries    = B->nooffprocentries;
3959   B->nooffprocentries = PETSC_TRUE;
3960   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3961   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3962   B->nooffprocentries = nooffprocentries;
3963 
3964   /* count number of entries below block diagonal */
3965   PetscCall(PetscFree(Aij->ld));
3966   PetscCall(PetscCalloc1(m, &ld));
3967   Aij->ld = ld;
3968   for (i = 0; i < m; i++) {
3969     nnz = Ii[i + 1] - Ii[i];
3970     j   = 0;
3971     while (j < nnz && J[j] < cstart) j++;
3972     ld[i] = j;
3973     if (J) J += nnz;
3974   }
3975 
3976   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3977   PetscFunctionReturn(PETSC_SUCCESS);
3978 }
3979 
3980 /*@
3981   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3982   (the default parallel PETSc format).
3983 
3984   Collective
3985 
3986   Input Parameters:
3987 + B - the matrix
3988 . i - the indices into `j` for the start of each local row (indices start with zero)
3989 . j - the column indices for each local row (indices start with zero)
3990 - v - optional values in the matrix
3991 
3992   Level: developer
3993 
3994   Notes:
3995   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3996   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3997   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3998 
3999   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4000 
4001   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
4002 
4003   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
4004 
4005   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
4006   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4007 
4008   The format which is used for the sparse matrix input, is equivalent to a
4009   row-major ordering.. i.e for the following matrix, the input data expected is
4010   as shown
4011 .vb
4012         1 0 0
4013         2 0 3     P0
4014        -------
4015         4 5 6     P1
4016 
4017      Process0 [P0] rows_owned=[0,1]
4018         i =  {0,1,3}  [size = nrow+1  = 2+1]
4019         j =  {0,0,2}  [size = 3]
4020         v =  {1,2,3}  [size = 3]
4021 
4022      Process1 [P1] rows_owned=[2]
4023         i =  {0,3}    [size = nrow+1  = 1+1]
4024         j =  {0,1,2}  [size = 3]
4025         v =  {4,5,6}  [size = 3]
4026 .ve
4027 
4028 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4029           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4030 @*/
4031 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4032 {
4033   PetscFunctionBegin;
4034   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4035   PetscFunctionReturn(PETSC_SUCCESS);
4036 }
4037 
4038 /*@
4039   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4040   (the default parallel PETSc format).  For good matrix assembly performance
4041   the user should preallocate the matrix storage by setting the parameters
4042   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4043 
4044   Collective
4045 
4046   Input Parameters:
4047 + B     - the matrix
4048 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4049            (same value is used for all local rows)
4050 . d_nnz - array containing the number of nonzeros in the various rows of the
4051            DIAGONAL portion of the local submatrix (possibly different for each row)
4052            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4053            The size of this array is equal to the number of local rows, i.e 'm'.
4054            For matrices that will be factored, you must leave room for (and set)
4055            the diagonal entry even if it is zero.
4056 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4057            submatrix (same value is used for all local rows).
4058 - o_nnz - array containing the number of nonzeros in the various rows of the
4059            OFF-DIAGONAL portion of the local submatrix (possibly different for
4060            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4061            structure. The size of this array is equal to the number
4062            of local rows, i.e 'm'.
4063 
4064   Example Usage:
4065   Consider the following 8x8 matrix with 34 non-zero values, that is
4066   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4067   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4068   as follows
4069 
4070 .vb
4071             1  2  0  |  0  3  0  |  0  4
4072     Proc0   0  5  6  |  7  0  0  |  8  0
4073             9  0 10  | 11  0  0  | 12  0
4074     -------------------------------------
4075            13  0 14  | 15 16 17  |  0  0
4076     Proc1   0 18  0  | 19 20 21  |  0  0
4077             0  0  0  | 22 23  0  | 24  0
4078     -------------------------------------
4079     Proc2  25 26 27  |  0  0 28  | 29  0
4080            30  0  0  | 31 32 33  |  0 34
4081 .ve
4082 
4083   This can be represented as a collection of submatrices as
4084 .vb
4085       A B C
4086       D E F
4087       G H I
4088 .ve
4089 
4090   Where the submatrices A,B,C are owned by proc0, D,E,F are
4091   owned by proc1, G,H,I are owned by proc2.
4092 
4093   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4094   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4095   The 'M','N' parameters are 8,8, and have the same values on all procs.
4096 
4097   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4098   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4099   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4100   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4101   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4102   matrix, ans [DF] as another `MATSEQAIJ` matrix.
4103 
4104   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4105   allocated for every row of the local diagonal submatrix, and `o_nz`
4106   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4107   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4108   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4109   In this case, the values of `d_nz`, `o_nz` are
4110 .vb
4111      proc0  dnz = 2, o_nz = 2
4112      proc1  dnz = 3, o_nz = 2
4113      proc2  dnz = 1, o_nz = 4
4114 .ve
4115   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4116   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4117   for proc3. i.e we are using 12+15+10=37 storage locations to store
4118   34 values.
4119 
4120   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4121   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4122   In the above case the values for `d_nnz`, `o_nnz` are
4123 .vb
4124      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4125      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4126      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4127 .ve
4128   Here the space allocated is sum of all the above values i.e 34, and
4129   hence pre-allocation is perfect.
4130 
4131   Level: intermediate
4132 
4133   Notes:
4134   If the *_nnz parameter is given then the *_nz parameter is ignored
4135 
4136   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4137   storage.  The stored row and column indices begin with zero.
4138   See [Sparse Matrices](sec_matsparse) for details.
4139 
4140   The parallel matrix is partitioned such that the first m0 rows belong to
4141   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4142   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4143 
4144   The DIAGONAL portion of the local submatrix of a processor can be defined
4145   as the submatrix which is obtained by extraction the part corresponding to
4146   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4147   first row that belongs to the processor, r2 is the last row belonging to
4148   the this processor, and c1-c2 is range of indices of the local part of a
4149   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4150   common case of a square matrix, the row and column ranges are the same and
4151   the DIAGONAL part is also square. The remaining portion of the local
4152   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4153 
4154   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4155 
4156   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4157   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4158   You can also run with the option `-info` and look for messages with the string
4159   malloc in them to see if additional memory allocation was needed.
4160 
4161 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4162           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4163 @*/
4164 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4165 {
4166   PetscFunctionBegin;
4167   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4168   PetscValidType(B, 1);
4169   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4170   PetscFunctionReturn(PETSC_SUCCESS);
4171 }
4172 
4173 /*@
4174   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4175   CSR format for the local rows.
4176 
4177   Collective
4178 
4179   Input Parameters:
4180 + comm - MPI communicator
4181 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4182 . n    - This value should be the same as the local size used in creating the
4183          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4184          calculated if `N` is given) For square matrices n is almost always `m`.
4185 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4186 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4187 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4188 . j    - global column indices
4189 - a    - optional matrix values
4190 
4191   Output Parameter:
4192 . mat - the matrix
4193 
4194   Level: intermediate
4195 
4196   Notes:
4197   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4198   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4199   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4200 
4201   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4202 
4203   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4204 
4205   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4206   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4207 
4208   The format which is used for the sparse matrix input, is equivalent to a
4209   row-major ordering, i.e., for the following matrix, the input data expected is
4210   as shown
4211 .vb
4212         1 0 0
4213         2 0 3     P0
4214        -------
4215         4 5 6     P1
4216 
4217      Process0 [P0] rows_owned=[0,1]
4218         i =  {0,1,3}  [size = nrow+1  = 2+1]
4219         j =  {0,0,2}  [size = 3]
4220         v =  {1,2,3}  [size = 3]
4221 
4222      Process1 [P1] rows_owned=[2]
4223         i =  {0,3}    [size = nrow+1  = 1+1]
4224         j =  {0,1,2}  [size = 3]
4225         v =  {4,5,6}  [size = 3]
4226 .ve
4227 
4228 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4229           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4230 @*/
4231 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4232 {
4233   PetscFunctionBegin;
4234   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4235   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4236   PetscCall(MatCreate(comm, mat));
4237   PetscCall(MatSetSizes(*mat, m, n, M, N));
4238   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4239   PetscCall(MatSetType(*mat, MATMPIAIJ));
4240   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4241   PetscFunctionReturn(PETSC_SUCCESS);
4242 }
4243 
4244 /*@
4245   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4246   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4247   from `MatCreateMPIAIJWithArrays()`
4248 
4249   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4250 
4251   Collective
4252 
4253   Input Parameters:
4254 + mat - the matrix
4255 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4256 . n   - This value should be the same as the local size used in creating the
4257        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4258        calculated if N is given) For square matrices n is almost always m.
4259 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4260 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4261 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4262 . J   - column indices
4263 - v   - matrix values
4264 
4265   Level: deprecated
4266 
4267 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4268           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4269 @*/
4270 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4271 {
4272   PetscInt        nnz, i;
4273   PetscBool       nooffprocentries;
4274   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4275   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4276   PetscScalar    *ad, *ao;
4277   PetscInt        ldi, Iii, md;
4278   const PetscInt *Adi = Ad->i;
4279   PetscInt       *ld  = Aij->ld;
4280 
4281   PetscFunctionBegin;
4282   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4283   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4284   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4285   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4286 
4287   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4288   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4289 
4290   for (i = 0; i < m; i++) {
4291     if (PetscDefined(USE_DEBUG)) {
4292       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4293         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4294         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4295       }
4296     }
4297     nnz = Ii[i + 1] - Ii[i];
4298     Iii = Ii[i];
4299     ldi = ld[i];
4300     md  = Adi[i + 1] - Adi[i];
4301     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4302     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4303     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4304     ad += md;
4305     ao += nnz - md;
4306   }
4307   nooffprocentries      = mat->nooffprocentries;
4308   mat->nooffprocentries = PETSC_TRUE;
4309   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4310   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4311   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4312   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4313   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4314   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4315   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4316   mat->nooffprocentries = nooffprocentries;
4317   PetscFunctionReturn(PETSC_SUCCESS);
4318 }
4319 
4320 /*@
4321   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4322 
4323   Collective
4324 
4325   Input Parameters:
4326 + mat - the matrix
4327 - v   - matrix values, stored by row
4328 
4329   Level: intermediate
4330 
4331   Notes:
4332   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4333 
4334   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4335 
4336 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4337           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4338 @*/
4339 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4340 {
4341   PetscInt        nnz, i, m;
4342   PetscBool       nooffprocentries;
4343   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4344   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4345   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4346   PetscScalar    *ad, *ao;
4347   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4348   PetscInt        ldi, Iii, md;
4349   PetscInt       *ld = Aij->ld;
4350 
4351   PetscFunctionBegin;
4352   m = mat->rmap->n;
4353 
4354   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4355   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4356   Iii = 0;
4357   for (i = 0; i < m; i++) {
4358     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4359     ldi = ld[i];
4360     md  = Adi[i + 1] - Adi[i];
4361     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4362     ad += md;
4363     if (ao) {
4364       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4365       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4366       ao += nnz - md;
4367     }
4368     Iii += nnz;
4369   }
4370   nooffprocentries      = mat->nooffprocentries;
4371   mat->nooffprocentries = PETSC_TRUE;
4372   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4373   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4374   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4375   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4376   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4377   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4378   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4379   mat->nooffprocentries = nooffprocentries;
4380   PetscFunctionReturn(PETSC_SUCCESS);
4381 }
4382 
4383 /*@
4384   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4385   (the default parallel PETSc format).  For good matrix assembly performance
4386   the user should preallocate the matrix storage by setting the parameters
4387   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4388 
4389   Collective
4390 
4391   Input Parameters:
4392 + comm  - MPI communicator
4393 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4394           This value should be the same as the local size used in creating the
4395           y vector for the matrix-vector product y = Ax.
4396 . n     - This value should be the same as the local size used in creating the
4397           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4398           calculated if N is given) For square matrices n is almost always m.
4399 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4400 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4401 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4402           (same value is used for all local rows)
4403 . d_nnz - array containing the number of nonzeros in the various rows of the
4404           DIAGONAL portion of the local submatrix (possibly different for each row)
4405           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4406           The size of this array is equal to the number of local rows, i.e 'm'.
4407 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4408           submatrix (same value is used for all local rows).
4409 - o_nnz - array containing the number of nonzeros in the various rows of the
4410           OFF-DIAGONAL portion of the local submatrix (possibly different for
4411           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4412           structure. The size of this array is equal to the number
4413           of local rows, i.e 'm'.
4414 
4415   Output Parameter:
4416 . A - the matrix
4417 
4418   Options Database Keys:
4419 + -mat_no_inode                     - Do not use inodes
4420 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4421 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4422                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4423                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4424 
4425   Level: intermediate
4426 
4427   Notes:
4428   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4429   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4430   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4431 
4432   If the *_nnz parameter is given then the *_nz parameter is ignored
4433 
4434   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4435   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4436   storage requirements for this matrix.
4437 
4438   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4439   processor than it must be used on all processors that share the object for
4440   that argument.
4441 
4442   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4443   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4444 
4445   The user MUST specify either the local or global matrix dimensions
4446   (possibly both).
4447 
4448   The parallel matrix is partitioned across processors such that the
4449   first `m0` rows belong to process 0, the next `m1` rows belong to
4450   process 1, the next `m2` rows belong to process 2, etc., where
4451   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4452   values corresponding to [m x N] submatrix.
4453 
4454   The columns are logically partitioned with the n0 columns belonging
4455   to 0th partition, the next n1 columns belonging to the next
4456   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4457 
4458   The DIAGONAL portion of the local submatrix on any given processor
4459   is the submatrix corresponding to the rows and columns m,n
4460   corresponding to the given processor. i.e diagonal matrix on
4461   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4462   etc. The remaining portion of the local submatrix [m x (N-n)]
4463   constitute the OFF-DIAGONAL portion. The example below better
4464   illustrates this concept.
4465 
4466   For a square global matrix we define each processor's diagonal portion
4467   to be its local rows and the corresponding columns (a square submatrix);
4468   each processor's off-diagonal portion encompasses the remainder of the
4469   local matrix (a rectangular submatrix).
4470 
4471   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4472 
4473   When calling this routine with a single process communicator, a matrix of
4474   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4475   type of communicator, use the construction mechanism
4476 .vb
4477   MatCreate(..., &A);
4478   MatSetType(A, MATMPIAIJ);
4479   MatSetSizes(A, m, n, M, N);
4480   MatMPIAIJSetPreallocation(A, ...);
4481 .ve
4482 
4483   By default, this format uses inodes (identical nodes) when possible.
4484   We search for consecutive rows with the same nonzero structure, thereby
4485   reusing matrix information to achieve increased efficiency.
4486 
4487   Example Usage:
4488   Consider the following 8x8 matrix with 34 non-zero values, that is
4489   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4490   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4491   as follows
4492 
4493 .vb
4494             1  2  0  |  0  3  0  |  0  4
4495     Proc0   0  5  6  |  7  0  0  |  8  0
4496             9  0 10  | 11  0  0  | 12  0
4497     -------------------------------------
4498            13  0 14  | 15 16 17  |  0  0
4499     Proc1   0 18  0  | 19 20 21  |  0  0
4500             0  0  0  | 22 23  0  | 24  0
4501     -------------------------------------
4502     Proc2  25 26 27  |  0  0 28  | 29  0
4503            30  0  0  | 31 32 33  |  0 34
4504 .ve
4505 
4506   This can be represented as a collection of submatrices as
4507 
4508 .vb
4509       A B C
4510       D E F
4511       G H I
4512 .ve
4513 
4514   Where the submatrices A,B,C are owned by proc0, D,E,F are
4515   owned by proc1, G,H,I are owned by proc2.
4516 
4517   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4518   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4519   The 'M','N' parameters are 8,8, and have the same values on all procs.
4520 
4521   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4522   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4523   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4524   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4525   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4526   matrix, ans [DF] as another SeqAIJ matrix.
4527 
4528   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4529   allocated for every row of the local diagonal submatrix, and `o_nz`
4530   storage locations are allocated for every row of the OFF-DIAGONAL submat.
4531   One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4532   rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4533   In this case, the values of `d_nz`,`o_nz` are
4534 .vb
4535      proc0  dnz = 2, o_nz = 2
4536      proc1  dnz = 3, o_nz = 2
4537      proc2  dnz = 1, o_nz = 4
4538 .ve
4539   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4540   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4541   for proc3. i.e we are using 12+15+10=37 storage locations to store
4542   34 values.
4543 
4544   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4545   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4546   In the above case the values for d_nnz,o_nnz are
4547 .vb
4548      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4549      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4550      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4551 .ve
4552   Here the space allocated is sum of all the above values i.e 34, and
4553   hence pre-allocation is perfect.
4554 
4555 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4556           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4557           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4558 @*/
4559 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4560 {
4561   PetscMPIInt size;
4562 
4563   PetscFunctionBegin;
4564   PetscCall(MatCreate(comm, A));
4565   PetscCall(MatSetSizes(*A, m, n, M, N));
4566   PetscCallMPI(MPI_Comm_size(comm, &size));
4567   if (size > 1) {
4568     PetscCall(MatSetType(*A, MATMPIAIJ));
4569     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4570   } else {
4571     PetscCall(MatSetType(*A, MATSEQAIJ));
4572     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4573   }
4574   PetscFunctionReturn(PETSC_SUCCESS);
4575 }
4576 
4577 /*MC
4578     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4579 
4580     Synopsis:
4581     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4582 
4583     Not Collective
4584 
4585     Input Parameter:
4586 .   A - the `MATMPIAIJ` matrix
4587 
4588     Output Parameters:
4589 +   Ad - the diagonal portion of the matrix
4590 .   Ao - the off-diagonal portion of the matrix
4591 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4592 -   ierr - error code
4593 
4594      Level: advanced
4595 
4596     Note:
4597     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4598 
4599 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4600 M*/
4601 
4602 /*MC
4603     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4604 
4605     Synopsis:
4606     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4607 
4608     Not Collective
4609 
4610     Input Parameters:
4611 +   A - the `MATMPIAIJ` matrix
4612 .   Ad - the diagonal portion of the matrix
4613 .   Ao - the off-diagonal portion of the matrix
4614 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4615 -   ierr - error code
4616 
4617      Level: advanced
4618 
4619 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4620 M*/
4621 
4622 /*@C
4623   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4624 
4625   Not Collective
4626 
4627   Input Parameter:
4628 . A - The `MATMPIAIJ` matrix
4629 
4630   Output Parameters:
4631 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4632 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4633 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4634 
4635   Level: intermediate
4636 
4637   Note:
4638   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4639   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4640   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4641   local column numbers to global column numbers in the original matrix.
4642 
4643   Fortran Notes:
4644   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4645 
4646 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4647 @*/
4648 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4649 {
4650   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4651   PetscBool   flg;
4652 
4653   PetscFunctionBegin;
4654   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4655   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4656   if (Ad) *Ad = a->A;
4657   if (Ao) *Ao = a->B;
4658   if (colmap) *colmap = a->garray;
4659   PetscFunctionReturn(PETSC_SUCCESS);
4660 }
4661 
4662 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4663 {
4664   PetscInt     m, N, i, rstart, nnz, Ii;
4665   PetscInt    *indx;
4666   PetscScalar *values;
4667   MatType      rootType;
4668 
4669   PetscFunctionBegin;
4670   PetscCall(MatGetSize(inmat, &m, &N));
4671   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4672     PetscInt *dnz, *onz, sum, bs, cbs;
4673 
4674     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4675     /* Check sum(n) = N */
4676     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4677     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4678 
4679     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4680     rstart -= m;
4681 
4682     MatPreallocateBegin(comm, m, n, dnz, onz);
4683     for (i = 0; i < m; i++) {
4684       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4685       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4686       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4687     }
4688 
4689     PetscCall(MatCreate(comm, outmat));
4690     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4691     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4692     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4693     PetscCall(MatGetRootType_Private(inmat, &rootType));
4694     PetscCall(MatSetType(*outmat, rootType));
4695     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4696     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4697     MatPreallocateEnd(dnz, onz);
4698     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4699   }
4700 
4701   /* numeric phase */
4702   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4703   for (i = 0; i < m; i++) {
4704     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4705     Ii = i + rstart;
4706     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4707     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4708   }
4709   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4710   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4711   PetscFunctionReturn(PETSC_SUCCESS);
4712 }
4713 
4714 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4715 {
4716   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
4717 
4718   PetscFunctionBegin;
4719   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4720   PetscCall(PetscFree(merge->id_r));
4721   PetscCall(PetscFree(merge->len_s));
4722   PetscCall(PetscFree(merge->len_r));
4723   PetscCall(PetscFree(merge->bi));
4724   PetscCall(PetscFree(merge->bj));
4725   PetscCall(PetscFree(merge->buf_ri[0]));
4726   PetscCall(PetscFree(merge->buf_ri));
4727   PetscCall(PetscFree(merge->buf_rj[0]));
4728   PetscCall(PetscFree(merge->buf_rj));
4729   PetscCall(PetscFree(merge->coi));
4730   PetscCall(PetscFree(merge->coj));
4731   PetscCall(PetscFree(merge->owners_co));
4732   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4733   PetscCall(PetscFree(merge));
4734   PetscFunctionReturn(PETSC_SUCCESS);
4735 }
4736 
4737 #include <../src/mat/utils/freespace.h>
4738 #include <petscbt.h>
4739 
4740 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4741 {
4742   MPI_Comm             comm;
4743   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4744   PetscMPIInt          size, rank, taga, *len_s;
4745   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4746   PetscMPIInt          proc, k;
4747   PetscInt           **buf_ri, **buf_rj;
4748   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4749   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4750   MPI_Request         *s_waits, *r_waits;
4751   MPI_Status          *status;
4752   const MatScalar     *aa, *a_a;
4753   MatScalar          **abuf_r, *ba_i;
4754   Mat_Merge_SeqsToMPI *merge;
4755   PetscContainer       container;
4756 
4757   PetscFunctionBegin;
4758   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4759   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4760 
4761   PetscCallMPI(MPI_Comm_size(comm, &size));
4762   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4763 
4764   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4765   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4766   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4767   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4768   aa = a_a;
4769 
4770   bi     = merge->bi;
4771   bj     = merge->bj;
4772   buf_ri = merge->buf_ri;
4773   buf_rj = merge->buf_rj;
4774 
4775   PetscCall(PetscMalloc1(size, &status));
4776   owners = merge->rowmap->range;
4777   len_s  = merge->len_s;
4778 
4779   /* send and recv matrix values */
4780   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4781   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4782 
4783   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4784   for (proc = 0, k = 0; proc < size; proc++) {
4785     if (!len_s[proc]) continue;
4786     i = owners[proc];
4787     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4788     k++;
4789   }
4790 
4791   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4792   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4793   PetscCall(PetscFree(status));
4794 
4795   PetscCall(PetscFree(s_waits));
4796   PetscCall(PetscFree(r_waits));
4797 
4798   /* insert mat values of mpimat */
4799   PetscCall(PetscMalloc1(N, &ba_i));
4800   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4801 
4802   for (k = 0; k < merge->nrecv; k++) {
4803     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4804     nrows       = *buf_ri_k[k];
4805     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4806     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4807   }
4808 
4809   /* set values of ba */
4810   m = merge->rowmap->n;
4811   for (i = 0; i < m; i++) {
4812     arow = owners[rank] + i;
4813     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4814     bnzi = bi[i + 1] - bi[i];
4815     PetscCall(PetscArrayzero(ba_i, bnzi));
4816 
4817     /* add local non-zero vals of this proc's seqmat into ba */
4818     anzi   = ai[arow + 1] - ai[arow];
4819     aj     = a->j + ai[arow];
4820     aa     = a_a + ai[arow];
4821     nextaj = 0;
4822     for (j = 0; nextaj < anzi; j++) {
4823       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4824         ba_i[j] += aa[nextaj++];
4825       }
4826     }
4827 
4828     /* add received vals into ba */
4829     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4830       /* i-th row */
4831       if (i == *nextrow[k]) {
4832         anzi   = *(nextai[k] + 1) - *nextai[k];
4833         aj     = buf_rj[k] + *nextai[k];
4834         aa     = abuf_r[k] + *nextai[k];
4835         nextaj = 0;
4836         for (j = 0; nextaj < anzi; j++) {
4837           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4838             ba_i[j] += aa[nextaj++];
4839           }
4840         }
4841         nextrow[k]++;
4842         nextai[k]++;
4843       }
4844     }
4845     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4846   }
4847   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4848   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4849   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4850 
4851   PetscCall(PetscFree(abuf_r[0]));
4852   PetscCall(PetscFree(abuf_r));
4853   PetscCall(PetscFree(ba_i));
4854   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4855   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4856   PetscFunctionReturn(PETSC_SUCCESS);
4857 }
4858 
4859 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4860 {
4861   Mat                  B_mpi;
4862   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4863   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4864   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4865   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4866   PetscInt             len, *dnz, *onz, bs, cbs;
4867   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4868   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4869   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4870   MPI_Status          *status;
4871   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4872   PetscBT              lnkbt;
4873   Mat_Merge_SeqsToMPI *merge;
4874   PetscContainer       container;
4875 
4876   PetscFunctionBegin;
4877   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4878 
4879   /* make sure it is a PETSc comm */
4880   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4881   PetscCallMPI(MPI_Comm_size(comm, &size));
4882   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4883 
4884   PetscCall(PetscNew(&merge));
4885   PetscCall(PetscMalloc1(size, &status));
4886 
4887   /* determine row ownership */
4888   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4889   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4890   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4891   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4892   PetscCall(PetscLayoutSetUp(merge->rowmap));
4893   PetscCall(PetscMalloc1(size, &len_si));
4894   PetscCall(PetscMalloc1(size, &merge->len_s));
4895 
4896   m      = merge->rowmap->n;
4897   owners = merge->rowmap->range;
4898 
4899   /* determine the number of messages to send, their lengths */
4900   len_s = merge->len_s;
4901 
4902   len          = 0; /* length of buf_si[] */
4903   merge->nsend = 0;
4904   for (PetscMPIInt proc = 0; proc < size; proc++) {
4905     len_si[proc] = 0;
4906     if (proc == rank) {
4907       len_s[proc] = 0;
4908     } else {
4909       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4910       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4911     }
4912     if (len_s[proc]) {
4913       merge->nsend++;
4914       nrows = 0;
4915       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4916         if (ai[i + 1] > ai[i]) nrows++;
4917       }
4918       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4919       len += len_si[proc];
4920     }
4921   }
4922 
4923   /* determine the number and length of messages to receive for ij-structure */
4924   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4925   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4926 
4927   /* post the Irecv of j-structure */
4928   PetscCall(PetscCommGetNewTag(comm, &tagj));
4929   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4930 
4931   /* post the Isend of j-structure */
4932   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4933 
4934   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4935     if (!len_s[proc]) continue;
4936     i = owners[proc];
4937     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4938     k++;
4939   }
4940 
4941   /* receives and sends of j-structure are complete */
4942   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4943   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4944 
4945   /* send and recv i-structure */
4946   PetscCall(PetscCommGetNewTag(comm, &tagi));
4947   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4948 
4949   PetscCall(PetscMalloc1(len + 1, &buf_s));
4950   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4951   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4952     if (!len_s[proc]) continue;
4953     /* form outgoing message for i-structure:
4954          buf_si[0]:                 nrows to be sent
4955                [1:nrows]:           row index (global)
4956                [nrows+1:2*nrows+1]: i-structure index
4957     */
4958     nrows       = len_si[proc] / 2 - 1;
4959     buf_si_i    = buf_si + nrows + 1;
4960     buf_si[0]   = nrows;
4961     buf_si_i[0] = 0;
4962     nrows       = 0;
4963     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4964       anzi = ai[i + 1] - ai[i];
4965       if (anzi) {
4966         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4967         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4968         nrows++;
4969       }
4970     }
4971     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4972     k++;
4973     buf_si += len_si[proc];
4974   }
4975 
4976   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4977   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4978 
4979   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4980   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4981 
4982   PetscCall(PetscFree(len_si));
4983   PetscCall(PetscFree(len_ri));
4984   PetscCall(PetscFree(rj_waits));
4985   PetscCall(PetscFree2(si_waits, sj_waits));
4986   PetscCall(PetscFree(ri_waits));
4987   PetscCall(PetscFree(buf_s));
4988   PetscCall(PetscFree(status));
4989 
4990   /* compute a local seq matrix in each processor */
4991   /* allocate bi array and free space for accumulating nonzero column info */
4992   PetscCall(PetscMalloc1(m + 1, &bi));
4993   bi[0] = 0;
4994 
4995   /* create and initialize a linked list */
4996   nlnk = N + 1;
4997   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4998 
4999   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5000   len = ai[owners[rank + 1]] - ai[owners[rank]];
5001   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
5002 
5003   current_space = free_space;
5004 
5005   /* determine symbolic info for each local row */
5006   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
5007 
5008   for (k = 0; k < merge->nrecv; k++) {
5009     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5010     nrows       = *buf_ri_k[k];
5011     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5012     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5013   }
5014 
5015   MatPreallocateBegin(comm, m, n, dnz, onz);
5016   len = 0;
5017   for (i = 0; i < m; i++) {
5018     bnzi = 0;
5019     /* add local non-zero cols of this proc's seqmat into lnk */
5020     arow = owners[rank] + i;
5021     anzi = ai[arow + 1] - ai[arow];
5022     aj   = a->j + ai[arow];
5023     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5024     bnzi += nlnk;
5025     /* add received col data into lnk */
5026     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5027       if (i == *nextrow[k]) {            /* i-th row */
5028         anzi = *(nextai[k] + 1) - *nextai[k];
5029         aj   = buf_rj[k] + *nextai[k];
5030         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5031         bnzi += nlnk;
5032         nextrow[k]++;
5033         nextai[k]++;
5034       }
5035     }
5036     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5037 
5038     /* if free space is not available, make more free space */
5039     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5040     /* copy data into free space, then initialize lnk */
5041     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5042     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5043 
5044     current_space->array += bnzi;
5045     current_space->local_used += bnzi;
5046     current_space->local_remaining -= bnzi;
5047 
5048     bi[i + 1] = bi[i] + bnzi;
5049   }
5050 
5051   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5052 
5053   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5054   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5055   PetscCall(PetscLLDestroy(lnk, lnkbt));
5056 
5057   /* create symbolic parallel matrix B_mpi */
5058   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5059   PetscCall(MatCreate(comm, &B_mpi));
5060   if (n == PETSC_DECIDE) {
5061     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5062   } else {
5063     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5064   }
5065   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5066   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5067   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5068   MatPreallocateEnd(dnz, onz);
5069   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5070 
5071   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5072   B_mpi->assembled = PETSC_FALSE;
5073   merge->bi        = bi;
5074   merge->bj        = bj;
5075   merge->buf_ri    = buf_ri;
5076   merge->buf_rj    = buf_rj;
5077   merge->coi       = NULL;
5078   merge->coj       = NULL;
5079   merge->owners_co = NULL;
5080 
5081   PetscCall(PetscCommDestroy(&comm));
5082 
5083   /* attach the supporting struct to B_mpi for reuse */
5084   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5085   PetscCall(PetscContainerSetPointer(container, merge));
5086   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5087   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5088   PetscCall(PetscContainerDestroy(&container));
5089   *mpimat = B_mpi;
5090 
5091   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5092   PetscFunctionReturn(PETSC_SUCCESS);
5093 }
5094 
5095 /*@
5096   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5097   matrices from each processor
5098 
5099   Collective
5100 
5101   Input Parameters:
5102 + comm   - the communicators the parallel matrix will live on
5103 . seqmat - the input sequential matrices
5104 . m      - number of local rows (or `PETSC_DECIDE`)
5105 . n      - number of local columns (or `PETSC_DECIDE`)
5106 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5107 
5108   Output Parameter:
5109 . mpimat - the parallel matrix generated
5110 
5111   Level: advanced
5112 
5113   Note:
5114   The dimensions of the sequential matrix in each processor MUST be the same.
5115   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5116   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5117 
5118 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5119 @*/
5120 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5121 {
5122   PetscMPIInt size;
5123 
5124   PetscFunctionBegin;
5125   PetscCallMPI(MPI_Comm_size(comm, &size));
5126   if (size == 1) {
5127     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5128     if (scall == MAT_INITIAL_MATRIX) {
5129       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5130     } else {
5131       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5132     }
5133     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5134     PetscFunctionReturn(PETSC_SUCCESS);
5135   }
5136   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5137   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5138   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5139   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5140   PetscFunctionReturn(PETSC_SUCCESS);
5141 }
5142 
5143 /*@
5144   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5145 
5146   Not Collective
5147 
5148   Input Parameter:
5149 . A - the matrix
5150 
5151   Output Parameter:
5152 . A_loc - the local sequential matrix generated
5153 
5154   Level: developer
5155 
5156   Notes:
5157   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5158   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5159   `n` is the global column count obtained with `MatGetSize()`
5160 
5161   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5162 
5163   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5164 
5165   Destroy the matrix with `MatDestroy()`
5166 
5167 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5168 @*/
5169 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5170 {
5171   PetscBool mpi;
5172 
5173   PetscFunctionBegin;
5174   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5175   if (mpi) {
5176     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5177   } else {
5178     *A_loc = A;
5179     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5180   }
5181   PetscFunctionReturn(PETSC_SUCCESS);
5182 }
5183 
5184 /*@
5185   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5186 
5187   Not Collective
5188 
5189   Input Parameters:
5190 + A     - the matrix
5191 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5192 
5193   Output Parameter:
5194 . A_loc - the local sequential matrix generated
5195 
5196   Level: developer
5197 
5198   Notes:
5199   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5200   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5201   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5202 
5203   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5204 
5205   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5206   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5207   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5208   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5209 
5210 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5211 @*/
5212 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5213 {
5214   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5215   Mat_SeqAIJ        *mat, *a, *b;
5216   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5217   const PetscScalar *aa, *ba, *aav, *bav;
5218   PetscScalar       *ca, *cam;
5219   PetscMPIInt        size;
5220   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5221   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5222   PetscBool          match;
5223 
5224   PetscFunctionBegin;
5225   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5226   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5227   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5228   if (size == 1) {
5229     if (scall == MAT_INITIAL_MATRIX) {
5230       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5231       *A_loc = mpimat->A;
5232     } else if (scall == MAT_REUSE_MATRIX) {
5233       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5234     }
5235     PetscFunctionReturn(PETSC_SUCCESS);
5236   }
5237 
5238   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5239   a  = (Mat_SeqAIJ *)mpimat->A->data;
5240   b  = (Mat_SeqAIJ *)mpimat->B->data;
5241   ai = a->i;
5242   aj = a->j;
5243   bi = b->i;
5244   bj = b->j;
5245   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5246   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5247   aa = aav;
5248   ba = bav;
5249   if (scall == MAT_INITIAL_MATRIX) {
5250     PetscCall(PetscMalloc1(1 + am, &ci));
5251     ci[0] = 0;
5252     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5253     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5254     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5255     k = 0;
5256     for (i = 0; i < am; i++) {
5257       ncols_o = bi[i + 1] - bi[i];
5258       ncols_d = ai[i + 1] - ai[i];
5259       /* off-diagonal portion of A */
5260       for (jo = 0; jo < ncols_o; jo++) {
5261         col = cmap[*bj];
5262         if (col >= cstart) break;
5263         cj[k] = col;
5264         bj++;
5265         ca[k++] = *ba++;
5266       }
5267       /* diagonal portion of A */
5268       for (j = 0; j < ncols_d; j++) {
5269         cj[k]   = cstart + *aj++;
5270         ca[k++] = *aa++;
5271       }
5272       /* off-diagonal portion of A */
5273       for (j = jo; j < ncols_o; j++) {
5274         cj[k]   = cmap[*bj++];
5275         ca[k++] = *ba++;
5276       }
5277     }
5278     /* put together the new matrix */
5279     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5280     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5281     /* Since these are PETSc arrays, change flags to free them as necessary. */
5282     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5283     mat->free_a  = PETSC_TRUE;
5284     mat->free_ij = PETSC_TRUE;
5285     mat->nonew   = 0;
5286   } else if (scall == MAT_REUSE_MATRIX) {
5287     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5288     ci  = mat->i;
5289     cj  = mat->j;
5290     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5291     for (i = 0; i < am; i++) {
5292       /* off-diagonal portion of A */
5293       ncols_o = bi[i + 1] - bi[i];
5294       for (jo = 0; jo < ncols_o; jo++) {
5295         col = cmap[*bj];
5296         if (col >= cstart) break;
5297         *cam++ = *ba++;
5298         bj++;
5299       }
5300       /* diagonal portion of A */
5301       ncols_d = ai[i + 1] - ai[i];
5302       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5303       /* off-diagonal portion of A */
5304       for (j = jo; j < ncols_o; j++) {
5305         *cam++ = *ba++;
5306         bj++;
5307       }
5308     }
5309     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5310   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5311   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5312   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5313   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5314   PetscFunctionReturn(PETSC_SUCCESS);
5315 }
5316 
5317 /*@
5318   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5319   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5320 
5321   Not Collective
5322 
5323   Input Parameters:
5324 + A     - the matrix
5325 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5326 
5327   Output Parameters:
5328 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5329 - A_loc - the local sequential matrix generated
5330 
5331   Level: developer
5332 
5333   Note:
5334   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5335   part, then those associated with the off-diagonal part (in its local ordering)
5336 
5337 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5338 @*/
5339 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5340 {
5341   Mat             Ao, Ad;
5342   const PetscInt *cmap;
5343   PetscMPIInt     size;
5344   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5345 
5346   PetscFunctionBegin;
5347   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5348   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5349   if (size == 1) {
5350     if (scall == MAT_INITIAL_MATRIX) {
5351       PetscCall(PetscObjectReference((PetscObject)Ad));
5352       *A_loc = Ad;
5353     } else if (scall == MAT_REUSE_MATRIX) {
5354       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5355     }
5356     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5357     PetscFunctionReturn(PETSC_SUCCESS);
5358   }
5359   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5360   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5361   if (f) {
5362     PetscCall((*f)(A, scall, glob, A_loc));
5363   } else {
5364     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5365     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5366     Mat_SeqAIJ        *c;
5367     PetscInt          *ai = a->i, *aj = a->j;
5368     PetscInt          *bi = b->i, *bj = b->j;
5369     PetscInt          *ci, *cj;
5370     const PetscScalar *aa, *ba;
5371     PetscScalar       *ca;
5372     PetscInt           i, j, am, dn, on;
5373 
5374     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5375     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5376     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5377     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5378     if (scall == MAT_INITIAL_MATRIX) {
5379       PetscInt k;
5380       PetscCall(PetscMalloc1(1 + am, &ci));
5381       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5382       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5383       ci[0] = 0;
5384       for (i = 0, k = 0; i < am; i++) {
5385         const PetscInt ncols_o = bi[i + 1] - bi[i];
5386         const PetscInt ncols_d = ai[i + 1] - ai[i];
5387         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5388         /* diagonal portion of A */
5389         for (j = 0; j < ncols_d; j++, k++) {
5390           cj[k] = *aj++;
5391           ca[k] = *aa++;
5392         }
5393         /* off-diagonal portion of A */
5394         for (j = 0; j < ncols_o; j++, k++) {
5395           cj[k] = dn + *bj++;
5396           ca[k] = *ba++;
5397         }
5398       }
5399       /* put together the new matrix */
5400       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5401       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5402       /* Since these are PETSc arrays, change flags to free them as necessary. */
5403       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5404       c->free_a  = PETSC_TRUE;
5405       c->free_ij = PETSC_TRUE;
5406       c->nonew   = 0;
5407       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5408     } else if (scall == MAT_REUSE_MATRIX) {
5409       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5410       for (i = 0; i < am; i++) {
5411         const PetscInt ncols_d = ai[i + 1] - ai[i];
5412         const PetscInt ncols_o = bi[i + 1] - bi[i];
5413         /* diagonal portion of A */
5414         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5415         /* off-diagonal portion of A */
5416         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5417       }
5418       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5419     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5420     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5421     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5422     if (glob) {
5423       PetscInt cst, *gidx;
5424 
5425       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5426       PetscCall(PetscMalloc1(dn + on, &gidx));
5427       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5428       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5429       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5430     }
5431   }
5432   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5433   PetscFunctionReturn(PETSC_SUCCESS);
5434 }
5435 
5436 /*@C
5437   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5438 
5439   Not Collective
5440 
5441   Input Parameters:
5442 + A     - the matrix
5443 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5444 . row   - index set of rows to extract (or `NULL`)
5445 - col   - index set of columns to extract (or `NULL`)
5446 
5447   Output Parameter:
5448 . A_loc - the local sequential matrix generated
5449 
5450   Level: developer
5451 
5452 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5453 @*/
5454 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5455 {
5456   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5457   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5458   IS          isrowa, iscola;
5459   Mat        *aloc;
5460   PetscBool   match;
5461 
5462   PetscFunctionBegin;
5463   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5464   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5465   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5466   if (!row) {
5467     start = A->rmap->rstart;
5468     end   = A->rmap->rend;
5469     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5470   } else {
5471     isrowa = *row;
5472   }
5473   if (!col) {
5474     start = A->cmap->rstart;
5475     cmap  = a->garray;
5476     nzA   = a->A->cmap->n;
5477     nzB   = a->B->cmap->n;
5478     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5479     ncols = 0;
5480     for (i = 0; i < nzB; i++) {
5481       if (cmap[i] < start) idx[ncols++] = cmap[i];
5482       else break;
5483     }
5484     imark = i;
5485     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5486     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5487     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5488   } else {
5489     iscola = *col;
5490   }
5491   if (scall != MAT_INITIAL_MATRIX) {
5492     PetscCall(PetscMalloc1(1, &aloc));
5493     aloc[0] = *A_loc;
5494   }
5495   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5496   if (!col) { /* attach global id of condensed columns */
5497     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5498   }
5499   *A_loc = aloc[0];
5500   PetscCall(PetscFree(aloc));
5501   if (!row) PetscCall(ISDestroy(&isrowa));
5502   if (!col) PetscCall(ISDestroy(&iscola));
5503   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5504   PetscFunctionReturn(PETSC_SUCCESS);
5505 }
5506 
5507 /*
5508  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5509  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5510  * on a global size.
5511  * */
5512 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5513 {
5514   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5515   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5516   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5517   PetscMPIInt            owner;
5518   PetscSFNode           *iremote, *oiremote;
5519   const PetscInt        *lrowindices;
5520   PetscSF                sf, osf;
5521   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5522   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5523   MPI_Comm               comm;
5524   ISLocalToGlobalMapping mapping;
5525   const PetscScalar     *pd_a, *po_a;
5526 
5527   PetscFunctionBegin;
5528   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5529   /* plocalsize is the number of roots
5530    * nrows is the number of leaves
5531    * */
5532   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5533   PetscCall(ISGetLocalSize(rows, &nrows));
5534   PetscCall(PetscCalloc1(nrows, &iremote));
5535   PetscCall(ISGetIndices(rows, &lrowindices));
5536   for (i = 0; i < nrows; i++) {
5537     /* Find a remote index and an owner for a row
5538      * The row could be local or remote
5539      * */
5540     owner = 0;
5541     lidx  = 0;
5542     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5543     iremote[i].index = lidx;
5544     iremote[i].rank  = owner;
5545   }
5546   /* Create SF to communicate how many nonzero columns for each row */
5547   PetscCall(PetscSFCreate(comm, &sf));
5548   /* SF will figure out the number of nonzero columns for each row, and their
5549    * offsets
5550    * */
5551   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5552   PetscCall(PetscSFSetFromOptions(sf));
5553   PetscCall(PetscSFSetUp(sf));
5554 
5555   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5556   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5557   PetscCall(PetscCalloc1(nrows, &pnnz));
5558   roffsets[0] = 0;
5559   roffsets[1] = 0;
5560   for (i = 0; i < plocalsize; i++) {
5561     /* diagonal */
5562     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5563     /* off-diagonal */
5564     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5565     /* compute offsets so that we relative location for each row */
5566     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5567     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5568   }
5569   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5570   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5571   /* 'r' means root, and 'l' means leaf */
5572   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5573   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5574   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5575   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5576   PetscCall(PetscSFDestroy(&sf));
5577   PetscCall(PetscFree(roffsets));
5578   PetscCall(PetscFree(nrcols));
5579   dntotalcols = 0;
5580   ontotalcols = 0;
5581   ncol        = 0;
5582   for (i = 0; i < nrows; i++) {
5583     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5584     ncol    = PetscMax(pnnz[i], ncol);
5585     /* diagonal */
5586     dntotalcols += nlcols[i * 2 + 0];
5587     /* off-diagonal */
5588     ontotalcols += nlcols[i * 2 + 1];
5589   }
5590   /* We do not need to figure the right number of columns
5591    * since all the calculations will be done by going through the raw data
5592    * */
5593   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5594   PetscCall(MatSetUp(*P_oth));
5595   PetscCall(PetscFree(pnnz));
5596   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5597   /* diagonal */
5598   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5599   /* off-diagonal */
5600   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5601   /* diagonal */
5602   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5603   /* off-diagonal */
5604   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5605   dntotalcols = 0;
5606   ontotalcols = 0;
5607   ntotalcols  = 0;
5608   for (i = 0; i < nrows; i++) {
5609     owner = 0;
5610     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5611     /* Set iremote for diag matrix */
5612     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5613       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5614       iremote[dntotalcols].rank  = owner;
5615       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5616       ilocal[dntotalcols++] = ntotalcols++;
5617     }
5618     /* off-diagonal */
5619     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5620       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5621       oiremote[ontotalcols].rank  = owner;
5622       oilocal[ontotalcols++]      = ntotalcols++;
5623     }
5624   }
5625   PetscCall(ISRestoreIndices(rows, &lrowindices));
5626   PetscCall(PetscFree(loffsets));
5627   PetscCall(PetscFree(nlcols));
5628   PetscCall(PetscSFCreate(comm, &sf));
5629   /* P serves as roots and P_oth is leaves
5630    * Diag matrix
5631    * */
5632   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5633   PetscCall(PetscSFSetFromOptions(sf));
5634   PetscCall(PetscSFSetUp(sf));
5635 
5636   PetscCall(PetscSFCreate(comm, &osf));
5637   /* off-diagonal */
5638   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5639   PetscCall(PetscSFSetFromOptions(osf));
5640   PetscCall(PetscSFSetUp(osf));
5641   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5642   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5643   /* operate on the matrix internal data to save memory */
5644   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5645   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5646   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5647   /* Convert to global indices for diag matrix */
5648   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5649   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5650   /* We want P_oth store global indices */
5651   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5652   /* Use memory scalable approach */
5653   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5654   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5655   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5656   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5657   /* Convert back to local indices */
5658   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5659   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5660   nout = 0;
5661   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5662   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5663   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5664   /* Exchange values */
5665   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5666   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5667   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5668   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5669   /* Stop PETSc from shrinking memory */
5670   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5671   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5672   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5673   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5674   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5675   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5676   PetscCall(PetscSFDestroy(&sf));
5677   PetscCall(PetscSFDestroy(&osf));
5678   PetscFunctionReturn(PETSC_SUCCESS);
5679 }
5680 
5681 /*
5682  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5683  * This supports MPIAIJ and MAIJ
5684  * */
5685 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5686 {
5687   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5688   Mat_SeqAIJ *p_oth;
5689   IS          rows, map;
5690   PetscHMapI  hamp;
5691   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5692   MPI_Comm    comm;
5693   PetscSF     sf, osf;
5694   PetscBool   has;
5695 
5696   PetscFunctionBegin;
5697   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5698   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5699   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5700    *  and then create a submatrix (that often is an overlapping matrix)
5701    * */
5702   if (reuse == MAT_INITIAL_MATRIX) {
5703     /* Use a hash table to figure out unique keys */
5704     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5705     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5706     count = 0;
5707     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5708     for (i = 0; i < a->B->cmap->n; i++) {
5709       key = a->garray[i] / dof;
5710       PetscCall(PetscHMapIHas(hamp, key, &has));
5711       if (!has) {
5712         mapping[i] = count;
5713         PetscCall(PetscHMapISet(hamp, key, count++));
5714       } else {
5715         /* Current 'i' has the same value the previous step */
5716         mapping[i] = count - 1;
5717       }
5718     }
5719     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5720     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5721     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5722     PetscCall(PetscCalloc1(htsize, &rowindices));
5723     off = 0;
5724     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5725     PetscCall(PetscHMapIDestroy(&hamp));
5726     PetscCall(PetscSortInt(htsize, rowindices));
5727     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5728     /* In case, the matrix was already created but users want to recreate the matrix */
5729     PetscCall(MatDestroy(P_oth));
5730     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5731     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5732     PetscCall(ISDestroy(&map));
5733     PetscCall(ISDestroy(&rows));
5734   } else if (reuse == MAT_REUSE_MATRIX) {
5735     /* If matrix was already created, we simply update values using SF objects
5736      * that as attached to the matrix earlier.
5737      */
5738     const PetscScalar *pd_a, *po_a;
5739 
5740     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5741     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5742     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5743     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5744     /* Update values in place */
5745     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5746     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5747     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5748     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5749     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5750     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5751     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5752     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5753   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5754   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5755   PetscFunctionReturn(PETSC_SUCCESS);
5756 }
5757 
5758 /*@C
5759   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5760 
5761   Collective
5762 
5763   Input Parameters:
5764 + A     - the first matrix in `MATMPIAIJ` format
5765 . B     - the second matrix in `MATMPIAIJ` format
5766 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5767 
5768   Output Parameters:
5769 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5770 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5771 - B_seq - the sequential matrix generated
5772 
5773   Level: developer
5774 
5775 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5776 @*/
5777 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5778 {
5779   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5780   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5781   IS          isrowb, iscolb;
5782   Mat        *bseq = NULL;
5783 
5784   PetscFunctionBegin;
5785   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5786              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5787   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5788 
5789   if (scall == MAT_INITIAL_MATRIX) {
5790     start = A->cmap->rstart;
5791     cmap  = a->garray;
5792     nzA   = a->A->cmap->n;
5793     nzB   = a->B->cmap->n;
5794     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5795     ncols = 0;
5796     for (i = 0; i < nzB; i++) { /* row < local row index */
5797       if (cmap[i] < start) idx[ncols++] = cmap[i];
5798       else break;
5799     }
5800     imark = i;
5801     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5802     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5803     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5804     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5805   } else {
5806     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5807     isrowb = *rowb;
5808     iscolb = *colb;
5809     PetscCall(PetscMalloc1(1, &bseq));
5810     bseq[0] = *B_seq;
5811   }
5812   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5813   *B_seq = bseq[0];
5814   PetscCall(PetscFree(bseq));
5815   if (!rowb) {
5816     PetscCall(ISDestroy(&isrowb));
5817   } else {
5818     *rowb = isrowb;
5819   }
5820   if (!colb) {
5821     PetscCall(ISDestroy(&iscolb));
5822   } else {
5823     *colb = iscolb;
5824   }
5825   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5826   PetscFunctionReturn(PETSC_SUCCESS);
5827 }
5828 
5829 /*
5830     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5831     of the OFF-DIAGONAL portion of local A
5832 
5833     Collective
5834 
5835    Input Parameters:
5836 +    A,B - the matrices in `MATMPIAIJ` format
5837 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5838 
5839    Output Parameter:
5840 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5841 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5842 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5843 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5844 
5845     Developer Note:
5846     This directly accesses information inside the VecScatter associated with the matrix-vector product
5847      for this matrix. This is not desirable..
5848 
5849     Level: developer
5850 
5851 */
5852 
5853 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5854 {
5855   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5856   VecScatter         ctx;
5857   MPI_Comm           comm;
5858   const PetscMPIInt *rprocs, *sprocs;
5859   PetscMPIInt        nrecvs, nsends;
5860   const PetscInt    *srow, *rstarts, *sstarts;
5861   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5862   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5863   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5864   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5865   PetscMPIInt        size, tag, rank, nreqs;
5866 
5867   PetscFunctionBegin;
5868   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5869   PetscCallMPI(MPI_Comm_size(comm, &size));
5870 
5871   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5872              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5873   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5874   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5875 
5876   if (size == 1) {
5877     startsj_s = NULL;
5878     bufa_ptr  = NULL;
5879     *B_oth    = NULL;
5880     PetscFunctionReturn(PETSC_SUCCESS);
5881   }
5882 
5883   ctx = a->Mvctx;
5884   tag = ((PetscObject)ctx)->tag;
5885 
5886   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5887   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5888   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5889   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5890   PetscCall(PetscMalloc1(nreqs, &reqs));
5891   rwaits = reqs;
5892   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5893 
5894   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5895   if (scall == MAT_INITIAL_MATRIX) {
5896     /* i-array */
5897     /*  post receives */
5898     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5899     for (i = 0; i < nrecvs; i++) {
5900       rowlen = rvalues + rstarts[i] * rbs;
5901       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5902       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5903     }
5904 
5905     /* pack the outgoing message */
5906     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5907 
5908     sstartsj[0] = 0;
5909     rstartsj[0] = 0;
5910     len         = 0; /* total length of j or a array to be sent */
5911     if (nsends) {
5912       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5913       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5914     }
5915     for (i = 0; i < nsends; i++) {
5916       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5917       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5918       for (j = 0; j < nrows; j++) {
5919         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5920         for (l = 0; l < sbs; l++) {
5921           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5922 
5923           rowlen[j * sbs + l] = ncols;
5924 
5925           len += ncols;
5926           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5927         }
5928         k++;
5929       }
5930       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5931 
5932       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5933     }
5934     /* recvs and sends of i-array are completed */
5935     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5936     PetscCall(PetscFree(svalues));
5937 
5938     /* allocate buffers for sending j and a arrays */
5939     PetscCall(PetscMalloc1(len + 1, &bufj));
5940     PetscCall(PetscMalloc1(len + 1, &bufa));
5941 
5942     /* create i-array of B_oth */
5943     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5944 
5945     b_othi[0] = 0;
5946     len       = 0; /* total length of j or a array to be received */
5947     k         = 0;
5948     for (i = 0; i < nrecvs; i++) {
5949       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5950       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5951       for (j = 0; j < nrows; j++) {
5952         b_othi[k + 1] = b_othi[k] + rowlen[j];
5953         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5954         k++;
5955       }
5956       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5957     }
5958     PetscCall(PetscFree(rvalues));
5959 
5960     /* allocate space for j and a arrays of B_oth */
5961     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5962     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5963 
5964     /* j-array */
5965     /*  post receives of j-array */
5966     for (i = 0; i < nrecvs; i++) {
5967       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5968       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5969     }
5970 
5971     /* pack the outgoing message j-array */
5972     if (nsends) k = sstarts[0];
5973     for (i = 0; i < nsends; i++) {
5974       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5975       bufJ  = bufj + sstartsj[i];
5976       for (j = 0; j < nrows; j++) {
5977         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5978         for (ll = 0; ll < sbs; ll++) {
5979           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5980           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5981           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5982         }
5983       }
5984       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5985     }
5986 
5987     /* recvs and sends of j-array are completed */
5988     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5989   } else if (scall == MAT_REUSE_MATRIX) {
5990     sstartsj = *startsj_s;
5991     rstartsj = *startsj_r;
5992     bufa     = *bufa_ptr;
5993     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5994   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5995 
5996   /* a-array */
5997   /*  post receives of a-array */
5998   for (i = 0; i < nrecvs; i++) {
5999     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
6000     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
6001   }
6002 
6003   /* pack the outgoing message a-array */
6004   if (nsends) k = sstarts[0];
6005   for (i = 0; i < nsends; i++) {
6006     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
6007     bufA  = bufa + sstartsj[i];
6008     for (j = 0; j < nrows; j++) {
6009       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6010       for (ll = 0; ll < sbs; ll++) {
6011         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6012         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6013         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6014       }
6015     }
6016     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6017   }
6018   /* recvs and sends of a-array are completed */
6019   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6020   PetscCall(PetscFree(reqs));
6021 
6022   if (scall == MAT_INITIAL_MATRIX) {
6023     Mat_SeqAIJ *b_oth;
6024 
6025     /* put together the new matrix */
6026     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6027 
6028     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6029     /* Since these are PETSc arrays, change flags to free them as necessary. */
6030     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6031     b_oth->free_a  = PETSC_TRUE;
6032     b_oth->free_ij = PETSC_TRUE;
6033     b_oth->nonew   = 0;
6034 
6035     PetscCall(PetscFree(bufj));
6036     if (!startsj_s || !bufa_ptr) {
6037       PetscCall(PetscFree2(sstartsj, rstartsj));
6038       PetscCall(PetscFree(bufa_ptr));
6039     } else {
6040       *startsj_s = sstartsj;
6041       *startsj_r = rstartsj;
6042       *bufa_ptr  = bufa;
6043     }
6044   } else if (scall == MAT_REUSE_MATRIX) {
6045     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6046   }
6047 
6048   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6049   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6050   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6051   PetscFunctionReturn(PETSC_SUCCESS);
6052 }
6053 
6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6057 #if defined(PETSC_HAVE_MKL_SPARSE)
6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6059 #endif
6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6062 #if defined(PETSC_HAVE_ELEMENTAL)
6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6064 #endif
6065 #if defined(PETSC_HAVE_SCALAPACK)
6066 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6067 #endif
6068 #if defined(PETSC_HAVE_HYPRE)
6069 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6070 #endif
6071 #if defined(PETSC_HAVE_CUDA)
6072 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6073 #endif
6074 #if defined(PETSC_HAVE_HIP)
6075 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6076 #endif
6077 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6078 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6079 #endif
6080 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6081 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6082 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6083 
6084 /*
6085     Computes (B'*A')' since computing B*A directly is untenable
6086 
6087                n                       p                          p
6088         [             ]       [             ]         [                 ]
6089       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6090         [             ]       [             ]         [                 ]
6091 
6092 */
6093 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6094 {
6095   Mat At, Bt, Ct;
6096 
6097   PetscFunctionBegin;
6098   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6099   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6100   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6101   PetscCall(MatDestroy(&At));
6102   PetscCall(MatDestroy(&Bt));
6103   PetscCall(MatTransposeSetPrecursor(Ct, C));
6104   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6105   PetscCall(MatDestroy(&Ct));
6106   PetscFunctionReturn(PETSC_SUCCESS);
6107 }
6108 
6109 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6110 {
6111   PetscBool cisdense;
6112 
6113   PetscFunctionBegin;
6114   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6115   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6116   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6117   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6118   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6119   PetscCall(MatSetUp(C));
6120 
6121   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6122   PetscFunctionReturn(PETSC_SUCCESS);
6123 }
6124 
6125 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6126 {
6127   Mat_Product *product = C->product;
6128   Mat          A = product->A, B = product->B;
6129 
6130   PetscFunctionBegin;
6131   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6132              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6133   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6134   C->ops->productsymbolic = MatProductSymbolic_AB;
6135   PetscFunctionReturn(PETSC_SUCCESS);
6136 }
6137 
6138 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6139 {
6140   Mat_Product *product = C->product;
6141 
6142   PetscFunctionBegin;
6143   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6144   PetscFunctionReturn(PETSC_SUCCESS);
6145 }
6146 
6147 /*
6148    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6149 
6150   Input Parameters:
6151 
6152     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6153     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6154 
6155     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6156 
6157     For Set1, j1[] contains column indices of the nonzeros.
6158     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6159     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6160     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6161 
6162     Similar for Set2.
6163 
6164     This routine merges the two sets of nonzeros row by row and removes repeats.
6165 
6166   Output Parameters: (memory is allocated by the caller)
6167 
6168     i[],j[]: the CSR of the merged matrix, which has m rows.
6169     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6170     imap2[]: similar to imap1[], but for Set2.
6171     Note we order nonzeros row-by-row and from left to right.
6172 */
6173 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6174 {
6175   PetscInt   r, m; /* Row index of mat */
6176   PetscCount t, t1, t2, b1, e1, b2, e2;
6177 
6178   PetscFunctionBegin;
6179   PetscCall(MatGetLocalSize(mat, &m, NULL));
6180   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6181   i[0]        = 0;
6182   for (r = 0; r < m; r++) { /* Do row by row merging */
6183     b1 = rowBegin1[r];
6184     e1 = rowEnd1[r];
6185     b2 = rowBegin2[r];
6186     e2 = rowEnd2[r];
6187     while (b1 < e1 && b2 < e2) {
6188       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6189         j[t]      = j1[b1];
6190         imap1[t1] = t;
6191         imap2[t2] = t;
6192         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6193         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6194         t1++;
6195         t2++;
6196         t++;
6197       } else if (j1[b1] < j2[b2]) {
6198         j[t]      = j1[b1];
6199         imap1[t1] = t;
6200         b1 += jmap1[t1 + 1] - jmap1[t1];
6201         t1++;
6202         t++;
6203       } else {
6204         j[t]      = j2[b2];
6205         imap2[t2] = t;
6206         b2 += jmap2[t2 + 1] - jmap2[t2];
6207         t2++;
6208         t++;
6209       }
6210     }
6211     /* Merge the remaining in either j1[] or j2[] */
6212     while (b1 < e1) {
6213       j[t]      = j1[b1];
6214       imap1[t1] = t;
6215       b1 += jmap1[t1 + 1] - jmap1[t1];
6216       t1++;
6217       t++;
6218     }
6219     while (b2 < e2) {
6220       j[t]      = j2[b2];
6221       imap2[t2] = t;
6222       b2 += jmap2[t2 + 1] - jmap2[t2];
6223       t2++;
6224       t++;
6225     }
6226     PetscCall(PetscIntCast(t, i + r + 1));
6227   }
6228   PetscFunctionReturn(PETSC_SUCCESS);
6229 }
6230 
6231 /*
6232   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6233 
6234   Input Parameters:
6235     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6236     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6237       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6238 
6239       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6240       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6241 
6242   Output Parameters:
6243     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6244     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6245       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6246       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6247 
6248     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6249       Atot: number of entries belonging to the diagonal block.
6250       Annz: number of unique nonzeros belonging to the diagonal block.
6251       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6252         repeats (i.e., same 'i,j' pair).
6253       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6254         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6255 
6256       Atot: number of entries belonging to the diagonal block
6257       Annz: number of unique nonzeros belonging to the diagonal block.
6258 
6259     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6260 
6261     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6262 */
6263 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6264 {
6265   PetscInt    cstart, cend, rstart, rend, row, col;
6266   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6267   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6268   PetscCount  k, m, p, q, r, s, mid;
6269   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6270 
6271   PetscFunctionBegin;
6272   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6273   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6274   m = rend - rstart;
6275 
6276   /* Skip negative rows */
6277   for (k = 0; k < n; k++)
6278     if (i[k] >= 0) break;
6279 
6280   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6281      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6282   */
6283   while (k < n) {
6284     row = i[k];
6285     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6286     for (s = k; s < n; s++)
6287       if (i[s] != row) break;
6288 
6289     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6290     for (p = k; p < s; p++) {
6291       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6292       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6293     }
6294     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6295     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6296     rowBegin[row - rstart] = k;
6297     rowMid[row - rstart]   = mid;
6298     rowEnd[row - rstart]   = s;
6299 
6300     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6301     Atot += mid - k;
6302     Btot += s - mid;
6303 
6304     /* Count unique nonzeros of this diag row */
6305     for (p = k; p < mid;) {
6306       col = j[p];
6307       do {
6308         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6309         p++;
6310       } while (p < mid && j[p] == col);
6311       Annz++;
6312     }
6313 
6314     /* Count unique nonzeros of this offdiag row */
6315     for (p = mid; p < s;) {
6316       col = j[p];
6317       do {
6318         p++;
6319       } while (p < s && j[p] == col);
6320       Bnnz++;
6321     }
6322     k = s;
6323   }
6324 
6325   /* Allocation according to Atot, Btot, Annz, Bnnz */
6326   PetscCall(PetscMalloc1(Atot, &Aperm));
6327   PetscCall(PetscMalloc1(Btot, &Bperm));
6328   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6329   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6330 
6331   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6332   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6333   for (r = 0; r < m; r++) {
6334     k   = rowBegin[r];
6335     mid = rowMid[r];
6336     s   = rowEnd[r];
6337     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6338     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6339     Atot += mid - k;
6340     Btot += s - mid;
6341 
6342     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6343     for (p = k; p < mid;) {
6344       col = j[p];
6345       q   = p;
6346       do {
6347         p++;
6348       } while (p < mid && j[p] == col);
6349       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6350       Annz++;
6351     }
6352 
6353     for (p = mid; p < s;) {
6354       col = j[p];
6355       q   = p;
6356       do {
6357         p++;
6358       } while (p < s && j[p] == col);
6359       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6360       Bnnz++;
6361     }
6362   }
6363   /* Output */
6364   *Aperm_ = Aperm;
6365   *Annz_  = Annz;
6366   *Atot_  = Atot;
6367   *Ajmap_ = Ajmap;
6368   *Bperm_ = Bperm;
6369   *Bnnz_  = Bnnz;
6370   *Btot_  = Btot;
6371   *Bjmap_ = Bjmap;
6372   PetscFunctionReturn(PETSC_SUCCESS);
6373 }
6374 
6375 /*
6376   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6377 
6378   Input Parameters:
6379     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6380     nnz:  number of unique nonzeros in the merged matrix
6381     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6382     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6383 
6384   Output Parameter: (memory is allocated by the caller)
6385     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6386 
6387   Example:
6388     nnz1 = 4
6389     nnz  = 6
6390     imap = [1,3,4,5]
6391     jmap = [0,3,5,6,7]
6392    then,
6393     jmap_new = [0,0,3,3,5,6,7]
6394 */
6395 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6396 {
6397   PetscCount k, p;
6398 
6399   PetscFunctionBegin;
6400   jmap_new[0] = 0;
6401   p           = nnz;                /* p loops over jmap_new[] backwards */
6402   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6403     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6404   }
6405   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6406   PetscFunctionReturn(PETSC_SUCCESS);
6407 }
6408 
6409 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
6410 {
6411   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
6412 
6413   PetscFunctionBegin;
6414   PetscCall(PetscSFDestroy(&coo->sf));
6415   PetscCall(PetscFree(coo->Aperm1));
6416   PetscCall(PetscFree(coo->Bperm1));
6417   PetscCall(PetscFree(coo->Ajmap1));
6418   PetscCall(PetscFree(coo->Bjmap1));
6419   PetscCall(PetscFree(coo->Aimap2));
6420   PetscCall(PetscFree(coo->Bimap2));
6421   PetscCall(PetscFree(coo->Aperm2));
6422   PetscCall(PetscFree(coo->Bperm2));
6423   PetscCall(PetscFree(coo->Ajmap2));
6424   PetscCall(PetscFree(coo->Bjmap2));
6425   PetscCall(PetscFree(coo->Cperm1));
6426   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6427   PetscCall(PetscFree(coo));
6428   PetscFunctionReturn(PETSC_SUCCESS);
6429 }
6430 
6431 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6432 {
6433   MPI_Comm             comm;
6434   PetscMPIInt          rank, size;
6435   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6436   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6437   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6438   PetscContainer       container;
6439   MatCOOStruct_MPIAIJ *coo;
6440 
6441   PetscFunctionBegin;
6442   PetscCall(PetscFree(mpiaij->garray));
6443   PetscCall(VecDestroy(&mpiaij->lvec));
6444 #if defined(PETSC_USE_CTABLE)
6445   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6446 #else
6447   PetscCall(PetscFree(mpiaij->colmap));
6448 #endif
6449   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6450   mat->assembled     = PETSC_FALSE;
6451   mat->was_assembled = PETSC_FALSE;
6452 
6453   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6454   PetscCallMPI(MPI_Comm_size(comm, &size));
6455   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6456   PetscCall(PetscLayoutSetUp(mat->rmap));
6457   PetscCall(PetscLayoutSetUp(mat->cmap));
6458   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6459   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6460   PetscCall(MatGetLocalSize(mat, &m, &n));
6461   PetscCall(MatGetSize(mat, &M, &N));
6462 
6463   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6464   /* entries come first, then local rows, then remote rows.                     */
6465   PetscCount n1 = coo_n, *perm1;
6466   PetscInt  *i1 = coo_i, *j1 = coo_j;
6467 
6468   PetscCall(PetscMalloc1(n1, &perm1));
6469   for (k = 0; k < n1; k++) perm1[k] = k;
6470 
6471   /* Manipulate indices so that entries with negative row or col indices will have smallest
6472      row indices, local entries will have greater but negative row indices, and remote entries
6473      will have positive row indices.
6474   */
6475   for (k = 0; k < n1; k++) {
6476     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6477     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6478     else {
6479       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6480       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6481     }
6482   }
6483 
6484   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6485   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6486 
6487   /* Advance k to the first entry we need to take care of */
6488   for (k = 0; k < n1; k++)
6489     if (i1[k] > PETSC_INT_MIN) break;
6490   PetscCount i1start = k;
6491 
6492   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6493   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6494 
6495   /*           Send remote rows to their owner                                  */
6496   /* Find which rows should be sent to which remote ranks*/
6497   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6498   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6499   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6500   const PetscInt *ranges;
6501   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6502 
6503   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6504   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6505   for (k = rem; k < n1;) {
6506     PetscMPIInt owner;
6507     PetscInt    firstRow, lastRow;
6508 
6509     /* Locate a row range */
6510     firstRow = i1[k]; /* first row of this owner */
6511     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6512     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6513 
6514     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6515     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6516 
6517     /* All entries in [k,p) belong to this remote owner */
6518     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6519       PetscMPIInt *sendto2;
6520       PetscInt    *nentries2;
6521       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6522 
6523       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6524       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6525       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6526       PetscCall(PetscFree2(sendto, nentries2));
6527       sendto   = sendto2;
6528       nentries = nentries2;
6529       maxNsend = maxNsend2;
6530     }
6531     sendto[nsend] = owner;
6532     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6533     nsend++;
6534     k = p;
6535   }
6536 
6537   /* Build 1st SF to know offsets on remote to send data */
6538   PetscSF      sf1;
6539   PetscInt     nroots = 1, nroots2 = 0;
6540   PetscInt     nleaves = nsend, nleaves2 = 0;
6541   PetscInt    *offsets;
6542   PetscSFNode *iremote;
6543 
6544   PetscCall(PetscSFCreate(comm, &sf1));
6545   PetscCall(PetscMalloc1(nsend, &iremote));
6546   PetscCall(PetscMalloc1(nsend, &offsets));
6547   for (k = 0; k < nsend; k++) {
6548     iremote[k].rank  = sendto[k];
6549     iremote[k].index = 0;
6550     nleaves2 += nentries[k];
6551     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6552   }
6553   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6554   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6555   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6556   PetscCall(PetscSFDestroy(&sf1));
6557   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6558 
6559   /* Build 2nd SF to send remote COOs to their owner */
6560   PetscSF sf2;
6561   nroots  = nroots2;
6562   nleaves = nleaves2;
6563   PetscCall(PetscSFCreate(comm, &sf2));
6564   PetscCall(PetscSFSetFromOptions(sf2));
6565   PetscCall(PetscMalloc1(nleaves, &iremote));
6566   p = 0;
6567   for (k = 0; k < nsend; k++) {
6568     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6569     for (q = 0; q < nentries[k]; q++, p++) {
6570       iremote[p].rank = sendto[k];
6571       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6572     }
6573   }
6574   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6575 
6576   /* Send the remote COOs to their owner */
6577   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6578   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6579   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6580   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6581   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6582   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6583   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6584   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6585   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6586   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6587   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6588 
6589   PetscCall(PetscFree(offsets));
6590   PetscCall(PetscFree2(sendto, nentries));
6591 
6592   /* Sort received COOs by row along with the permutation array     */
6593   for (k = 0; k < n2; k++) perm2[k] = k;
6594   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6595 
6596   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6597   PetscCount *Cperm1;
6598   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6599   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6600   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6601   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6602 
6603   /* Support for HYPRE matrices, kind of a hack.
6604      Swap min column with diagonal so that diagonal values will go first */
6605   PetscBool hypre;
6606   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6607   if (hypre) {
6608     PetscInt *minj;
6609     PetscBT   hasdiag;
6610 
6611     PetscCall(PetscBTCreate(m, &hasdiag));
6612     PetscCall(PetscMalloc1(m, &minj));
6613     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6614     for (k = i1start; k < rem; k++) {
6615       if (j1[k] < cstart || j1[k] >= cend) continue;
6616       const PetscInt rindex = i1[k] - rstart;
6617       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6618       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6619     }
6620     for (k = 0; k < n2; k++) {
6621       if (j2[k] < cstart || j2[k] >= cend) continue;
6622       const PetscInt rindex = i2[k] - rstart;
6623       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6624       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6625     }
6626     for (k = i1start; k < rem; k++) {
6627       const PetscInt rindex = i1[k] - rstart;
6628       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6629       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6630       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6631     }
6632     for (k = 0; k < n2; k++) {
6633       const PetscInt rindex = i2[k] - rstart;
6634       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6635       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6636       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6637     }
6638     PetscCall(PetscBTDestroy(&hasdiag));
6639     PetscCall(PetscFree(minj));
6640   }
6641 
6642   /* Split local COOs and received COOs into diag/offdiag portions */
6643   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6644   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6645   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6646   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6647   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6648   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6649 
6650   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6651   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6652   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6653   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6654 
6655   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6656   PetscInt *Ai, *Bi;
6657   PetscInt *Aj, *Bj;
6658 
6659   PetscCall(PetscMalloc1(m + 1, &Ai));
6660   PetscCall(PetscMalloc1(m + 1, &Bi));
6661   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6662   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6663 
6664   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6665   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6666   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6667   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6668   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6669 
6670   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6671   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6672 
6673   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6674   /* expect nonzeros in A/B most likely have local contributing entries        */
6675   PetscInt    Annz = Ai[m];
6676   PetscInt    Bnnz = Bi[m];
6677   PetscCount *Ajmap1_new, *Bjmap1_new;
6678 
6679   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6680   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6681 
6682   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6683   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6684 
6685   PetscCall(PetscFree(Aimap1));
6686   PetscCall(PetscFree(Ajmap1));
6687   PetscCall(PetscFree(Bimap1));
6688   PetscCall(PetscFree(Bjmap1));
6689   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6690   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6691   PetscCall(PetscFree(perm1));
6692   PetscCall(PetscFree3(i2, j2, perm2));
6693 
6694   Ajmap1 = Ajmap1_new;
6695   Bjmap1 = Bjmap1_new;
6696 
6697   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6698   if (Annz < Annz1 + Annz2) {
6699     PetscInt *Aj_new;
6700     PetscCall(PetscMalloc1(Annz, &Aj_new));
6701     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6702     PetscCall(PetscFree(Aj));
6703     Aj = Aj_new;
6704   }
6705 
6706   if (Bnnz < Bnnz1 + Bnnz2) {
6707     PetscInt *Bj_new;
6708     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6709     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6710     PetscCall(PetscFree(Bj));
6711     Bj = Bj_new;
6712   }
6713 
6714   /* Create new submatrices for on-process and off-process coupling                  */
6715   PetscScalar     *Aa, *Ba;
6716   MatType          rtype;
6717   Mat_SeqAIJ      *a, *b;
6718   PetscObjectState state;
6719   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6720   PetscCall(PetscCalloc1(Bnnz, &Ba));
6721   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6722   if (cstart) {
6723     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6724   }
6725 
6726   PetscCall(MatGetRootType_Private(mat, &rtype));
6727 
6728   MatSeqXAIJGetOptions_Private(mpiaij->A);
6729   PetscCall(MatDestroy(&mpiaij->A));
6730   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6731   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6732   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6733 
6734   MatSeqXAIJGetOptions_Private(mpiaij->B);
6735   PetscCall(MatDestroy(&mpiaij->B));
6736   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6737   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6738   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6739 
6740   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6741   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6742   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6743   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6744 
6745   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6746   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6747   a->free_a  = PETSC_TRUE;
6748   a->free_ij = PETSC_TRUE;
6749   b->free_a  = PETSC_TRUE;
6750   b->free_ij = PETSC_TRUE;
6751   a->maxnz   = a->nz;
6752   b->maxnz   = b->nz;
6753 
6754   /* conversion must happen AFTER multiply setup */
6755   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6756   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6757   PetscCall(VecDestroy(&mpiaij->lvec));
6758   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6759 
6760   // Put the COO struct in a container and then attach that to the matrix
6761   PetscCall(PetscMalloc1(1, &coo));
6762   coo->n       = coo_n;
6763   coo->sf      = sf2;
6764   coo->sendlen = nleaves;
6765   coo->recvlen = nroots;
6766   coo->Annz    = Annz;
6767   coo->Bnnz    = Bnnz;
6768   coo->Annz2   = Annz2;
6769   coo->Bnnz2   = Bnnz2;
6770   coo->Atot1   = Atot1;
6771   coo->Atot2   = Atot2;
6772   coo->Btot1   = Btot1;
6773   coo->Btot2   = Btot2;
6774   coo->Ajmap1  = Ajmap1;
6775   coo->Aperm1  = Aperm1;
6776   coo->Bjmap1  = Bjmap1;
6777   coo->Bperm1  = Bperm1;
6778   coo->Aimap2  = Aimap2;
6779   coo->Ajmap2  = Ajmap2;
6780   coo->Aperm2  = Aperm2;
6781   coo->Bimap2  = Bimap2;
6782   coo->Bjmap2  = Bjmap2;
6783   coo->Bperm2  = Bperm2;
6784   coo->Cperm1  = Cperm1;
6785   // Allocate in preallocation. If not used, it has zero cost on host
6786   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6787   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6788   PetscCall(PetscContainerSetPointer(container, coo));
6789   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6790   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6791   PetscCall(PetscContainerDestroy(&container));
6792   PetscFunctionReturn(PETSC_SUCCESS);
6793 }
6794 
6795 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6796 {
6797   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6798   Mat                  A = mpiaij->A, B = mpiaij->B;
6799   PetscScalar         *Aa, *Ba;
6800   PetscScalar         *sendbuf, *recvbuf;
6801   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6802   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6803   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6804   const PetscCount    *Cperm1;
6805   PetscContainer       container;
6806   MatCOOStruct_MPIAIJ *coo;
6807 
6808   PetscFunctionBegin;
6809   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6810   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6811   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6812   sendbuf = coo->sendbuf;
6813   recvbuf = coo->recvbuf;
6814   Ajmap1  = coo->Ajmap1;
6815   Ajmap2  = coo->Ajmap2;
6816   Aimap2  = coo->Aimap2;
6817   Bjmap1  = coo->Bjmap1;
6818   Bjmap2  = coo->Bjmap2;
6819   Bimap2  = coo->Bimap2;
6820   Aperm1  = coo->Aperm1;
6821   Aperm2  = coo->Aperm2;
6822   Bperm1  = coo->Bperm1;
6823   Bperm2  = coo->Bperm2;
6824   Cperm1  = coo->Cperm1;
6825 
6826   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6827   PetscCall(MatSeqAIJGetArray(B, &Ba));
6828 
6829   /* Pack entries to be sent to remote */
6830   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6831 
6832   /* Send remote entries to their owner and overlap the communication with local computation */
6833   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6834   /* Add local entries to A and B */
6835   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6836     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6837     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6838     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6839   }
6840   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6841     PetscScalar sum = 0.0;
6842     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6843     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6844   }
6845   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6846 
6847   /* Add received remote entries to A and B */
6848   for (PetscCount i = 0; i < coo->Annz2; i++) {
6849     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6850   }
6851   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6852     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6853   }
6854   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6855   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6856   PetscFunctionReturn(PETSC_SUCCESS);
6857 }
6858 
6859 /*MC
6860    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6861 
6862    Options Database Keys:
6863 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6864 
6865    Level: beginner
6866 
6867    Notes:
6868    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6869     in this case the values associated with the rows and columns one passes in are set to zero
6870     in the matrix
6871 
6872     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6873     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6874 
6875 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6876 M*/
6877 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6878 {
6879   Mat_MPIAIJ *b;
6880   PetscMPIInt size;
6881 
6882   PetscFunctionBegin;
6883   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6884 
6885   PetscCall(PetscNew(&b));
6886   B->data       = (void *)b;
6887   B->ops[0]     = MatOps_Values;
6888   B->assembled  = PETSC_FALSE;
6889   B->insertmode = NOT_SET_VALUES;
6890   b->size       = size;
6891 
6892   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6893 
6894   /* build cache for off array entries formed */
6895   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6896 
6897   b->donotstash  = PETSC_FALSE;
6898   b->colmap      = NULL;
6899   b->garray      = NULL;
6900   b->roworiented = PETSC_TRUE;
6901 
6902   /* stuff used for matrix vector multiply */
6903   b->lvec  = NULL;
6904   b->Mvctx = NULL;
6905 
6906   /* stuff for MatGetRow() */
6907   b->rowindices   = NULL;
6908   b->rowvalues    = NULL;
6909   b->getrowactive = PETSC_FALSE;
6910 
6911   /* flexible pointer used in CUSPARSE classes */
6912   b->spptr = NULL;
6913 
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6925 #if defined(PETSC_HAVE_CUDA)
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6927 #endif
6928 #if defined(PETSC_HAVE_HIP)
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6930 #endif
6931 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6933 #endif
6934 #if defined(PETSC_HAVE_MKL_SPARSE)
6935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6936 #endif
6937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6940   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6941 #if defined(PETSC_HAVE_ELEMENTAL)
6942   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6943 #endif
6944 #if defined(PETSC_HAVE_SCALAPACK)
6945   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6946 #endif
6947   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6948   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6949 #if defined(PETSC_HAVE_HYPRE)
6950   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6951   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6952 #endif
6953   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6954   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6955   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6956   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6957   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6958   PetscFunctionReturn(PETSC_SUCCESS);
6959 }
6960 
6961 /*@
6962   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6963   and "off-diagonal" part of the matrix in CSR format.
6964 
6965   Collective
6966 
6967   Input Parameters:
6968 + comm - MPI communicator
6969 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6970 . n    - This value should be the same as the local size used in creating the
6971          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6972          calculated if `N` is given) For square matrices `n` is almost always `m`.
6973 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6974 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6975 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6976 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6977 . a    - matrix values
6978 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6979 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6980 - oa   - matrix values
6981 
6982   Output Parameter:
6983 . mat - the matrix
6984 
6985   Level: advanced
6986 
6987   Notes:
6988   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6989   must free the arrays once the matrix has been destroyed and not before.
6990 
6991   The `i` and `j` indices are 0 based
6992 
6993   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6994 
6995   This sets local rows and cannot be used to set off-processor values.
6996 
6997   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6998   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6999   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
7000   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
7001   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
7002   communication if it is known that only local entries will be set.
7003 
7004 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
7005           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
7006 @*/
7007 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
7008 {
7009   Mat_MPIAIJ *maij;
7010 
7011   PetscFunctionBegin;
7012   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
7013   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
7014   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
7015   PetscCall(MatCreate(comm, mat));
7016   PetscCall(MatSetSizes(*mat, m, n, M, N));
7017   PetscCall(MatSetType(*mat, MATMPIAIJ));
7018   maij = (Mat_MPIAIJ *)(*mat)->data;
7019 
7020   (*mat)->preallocated = PETSC_TRUE;
7021 
7022   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7023   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7024 
7025   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7026   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7027 
7028   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7029   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7030   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7031   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7032   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7033   PetscFunctionReturn(PETSC_SUCCESS);
7034 }
7035 
7036 typedef struct {
7037   Mat       *mp;    /* intermediate products */
7038   PetscBool *mptmp; /* is the intermediate product temporary ? */
7039   PetscInt   cp;    /* number of intermediate products */
7040 
7041   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7042   PetscInt    *startsj_s, *startsj_r;
7043   PetscScalar *bufa;
7044   Mat          P_oth;
7045 
7046   /* may take advantage of merging product->B */
7047   Mat Bloc; /* B-local by merging diag and off-diag */
7048 
7049   /* cusparse does not have support to split between symbolic and numeric phases.
7050      When api_user is true, we don't need to update the numerical values
7051      of the temporary storage */
7052   PetscBool reusesym;
7053 
7054   /* support for COO values insertion */
7055   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7056   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7057   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7058   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7059   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7060   PetscMemType mtype;
7061 
7062   /* customization */
7063   PetscBool abmerge;
7064   PetscBool P_oth_bind;
7065 } MatMatMPIAIJBACKEND;
7066 
7067 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7068 {
7069   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7070   PetscInt             i;
7071 
7072   PetscFunctionBegin;
7073   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7074   PetscCall(PetscFree(mmdata->bufa));
7075   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7076   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7077   PetscCall(MatDestroy(&mmdata->P_oth));
7078   PetscCall(MatDestroy(&mmdata->Bloc));
7079   PetscCall(PetscSFDestroy(&mmdata->sf));
7080   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7081   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7082   PetscCall(PetscFree(mmdata->own[0]));
7083   PetscCall(PetscFree(mmdata->own));
7084   PetscCall(PetscFree(mmdata->off[0]));
7085   PetscCall(PetscFree(mmdata->off));
7086   PetscCall(PetscFree(mmdata));
7087   PetscFunctionReturn(PETSC_SUCCESS);
7088 }
7089 
7090 /* Copy selected n entries with indices in idx[] of A to v[].
7091    If idx is NULL, copy the whole data array of A to v[]
7092  */
7093 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7094 {
7095   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7096 
7097   PetscFunctionBegin;
7098   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7099   if (f) {
7100     PetscCall((*f)(A, n, idx, v));
7101   } else {
7102     const PetscScalar *vv;
7103 
7104     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7105     if (n && idx) {
7106       PetscScalar    *w  = v;
7107       const PetscInt *oi = idx;
7108       PetscInt        j;
7109 
7110       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7111     } else {
7112       PetscCall(PetscArraycpy(v, vv, n));
7113     }
7114     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7115   }
7116   PetscFunctionReturn(PETSC_SUCCESS);
7117 }
7118 
7119 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7120 {
7121   MatMatMPIAIJBACKEND *mmdata;
7122   PetscInt             i, n_d, n_o;
7123 
7124   PetscFunctionBegin;
7125   MatCheckProduct(C, 1);
7126   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7127   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7128   if (!mmdata->reusesym) { /* update temporary matrices */
7129     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7130     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7131   }
7132   mmdata->reusesym = PETSC_FALSE;
7133 
7134   for (i = 0; i < mmdata->cp; i++) {
7135     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7136     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7137   }
7138   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7139     PetscInt noff;
7140 
7141     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7142     if (mmdata->mptmp[i]) continue;
7143     if (noff) {
7144       PetscInt nown;
7145 
7146       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7147       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7148       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7149       n_o += noff;
7150       n_d += nown;
7151     } else {
7152       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7153 
7154       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7155       n_d += mm->nz;
7156     }
7157   }
7158   if (mmdata->hasoffproc) { /* offprocess insertion */
7159     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7160     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7161   }
7162   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7163   PetscFunctionReturn(PETSC_SUCCESS);
7164 }
7165 
7166 /* Support for Pt * A, A * P, or Pt * A * P */
7167 #define MAX_NUMBER_INTERMEDIATE 4
7168 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7169 {
7170   Mat_Product           *product = C->product;
7171   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7172   Mat_MPIAIJ            *a, *p;
7173   MatMatMPIAIJBACKEND   *mmdata;
7174   ISLocalToGlobalMapping P_oth_l2g = NULL;
7175   IS                     glob      = NULL;
7176   const char            *prefix;
7177   char                   pprefix[256];
7178   const PetscInt        *globidx, *P_oth_idx;
7179   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7180   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7181   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7182                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7183                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7184   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7185 
7186   MatProductType ptype;
7187   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7188   PetscMPIInt    size;
7189 
7190   PetscFunctionBegin;
7191   MatCheckProduct(C, 1);
7192   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7193   ptype = product->type;
7194   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7195     ptype                                          = MATPRODUCT_AB;
7196     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7197   }
7198   switch (ptype) {
7199   case MATPRODUCT_AB:
7200     A          = product->A;
7201     P          = product->B;
7202     m          = A->rmap->n;
7203     n          = P->cmap->n;
7204     M          = A->rmap->N;
7205     N          = P->cmap->N;
7206     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7207     break;
7208   case MATPRODUCT_AtB:
7209     P          = product->A;
7210     A          = product->B;
7211     m          = P->cmap->n;
7212     n          = A->cmap->n;
7213     M          = P->cmap->N;
7214     N          = A->cmap->N;
7215     hasoffproc = PETSC_TRUE;
7216     break;
7217   case MATPRODUCT_PtAP:
7218     A          = product->A;
7219     P          = product->B;
7220     m          = P->cmap->n;
7221     n          = P->cmap->n;
7222     M          = P->cmap->N;
7223     N          = P->cmap->N;
7224     hasoffproc = PETSC_TRUE;
7225     break;
7226   default:
7227     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7228   }
7229   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7230   if (size == 1) hasoffproc = PETSC_FALSE;
7231 
7232   /* defaults */
7233   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7234     mp[i]    = NULL;
7235     mptmp[i] = PETSC_FALSE;
7236     rmapt[i] = -1;
7237     cmapt[i] = -1;
7238     rmapa[i] = NULL;
7239     cmapa[i] = NULL;
7240   }
7241 
7242   /* customization */
7243   PetscCall(PetscNew(&mmdata));
7244   mmdata->reusesym = product->api_user;
7245   if (ptype == MATPRODUCT_AB) {
7246     if (product->api_user) {
7247       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7248       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7249       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7250       PetscOptionsEnd();
7251     } else {
7252       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7253       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7254       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7255       PetscOptionsEnd();
7256     }
7257   } else if (ptype == MATPRODUCT_PtAP) {
7258     if (product->api_user) {
7259       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7260       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7261       PetscOptionsEnd();
7262     } else {
7263       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7264       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7265       PetscOptionsEnd();
7266     }
7267   }
7268   a = (Mat_MPIAIJ *)A->data;
7269   p = (Mat_MPIAIJ *)P->data;
7270   PetscCall(MatSetSizes(C, m, n, M, N));
7271   PetscCall(PetscLayoutSetUp(C->rmap));
7272   PetscCall(PetscLayoutSetUp(C->cmap));
7273   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7274   PetscCall(MatGetOptionsPrefix(C, &prefix));
7275 
7276   cp = 0;
7277   switch (ptype) {
7278   case MATPRODUCT_AB: /* A * P */
7279     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7280 
7281     /* A_diag * P_local (merged or not) */
7282     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7283       /* P is product->B */
7284       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7285       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7286       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7287       PetscCall(MatProductSetFill(mp[cp], product->fill));
7288       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7289       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7290       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7291       mp[cp]->product->api_user = product->api_user;
7292       PetscCall(MatProductSetFromOptions(mp[cp]));
7293       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7294       PetscCall(ISGetIndices(glob, &globidx));
7295       rmapt[cp] = 1;
7296       cmapt[cp] = 2;
7297       cmapa[cp] = globidx;
7298       mptmp[cp] = PETSC_FALSE;
7299       cp++;
7300     } else { /* A_diag * P_diag and A_diag * P_off */
7301       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7302       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7303       PetscCall(MatProductSetFill(mp[cp], product->fill));
7304       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7305       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7306       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7307       mp[cp]->product->api_user = product->api_user;
7308       PetscCall(MatProductSetFromOptions(mp[cp]));
7309       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7310       rmapt[cp] = 1;
7311       cmapt[cp] = 1;
7312       mptmp[cp] = PETSC_FALSE;
7313       cp++;
7314       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7315       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7316       PetscCall(MatProductSetFill(mp[cp], product->fill));
7317       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7318       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7319       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7320       mp[cp]->product->api_user = product->api_user;
7321       PetscCall(MatProductSetFromOptions(mp[cp]));
7322       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7323       rmapt[cp] = 1;
7324       cmapt[cp] = 2;
7325       cmapa[cp] = p->garray;
7326       mptmp[cp] = PETSC_FALSE;
7327       cp++;
7328     }
7329 
7330     /* A_off * P_other */
7331     if (mmdata->P_oth) {
7332       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7333       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7334       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7335       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7336       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7337       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7338       PetscCall(MatProductSetFill(mp[cp], product->fill));
7339       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7340       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7341       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7342       mp[cp]->product->api_user = product->api_user;
7343       PetscCall(MatProductSetFromOptions(mp[cp]));
7344       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7345       rmapt[cp] = 1;
7346       cmapt[cp] = 2;
7347       cmapa[cp] = P_oth_idx;
7348       mptmp[cp] = PETSC_FALSE;
7349       cp++;
7350     }
7351     break;
7352 
7353   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7354     /* A is product->B */
7355     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7356     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7357       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7358       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7359       PetscCall(MatProductSetFill(mp[cp], product->fill));
7360       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7361       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7362       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7363       mp[cp]->product->api_user = product->api_user;
7364       PetscCall(MatProductSetFromOptions(mp[cp]));
7365       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7366       PetscCall(ISGetIndices(glob, &globidx));
7367       rmapt[cp] = 2;
7368       rmapa[cp] = globidx;
7369       cmapt[cp] = 2;
7370       cmapa[cp] = globidx;
7371       mptmp[cp] = PETSC_FALSE;
7372       cp++;
7373     } else {
7374       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7375       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7376       PetscCall(MatProductSetFill(mp[cp], product->fill));
7377       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7378       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7379       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7380       mp[cp]->product->api_user = product->api_user;
7381       PetscCall(MatProductSetFromOptions(mp[cp]));
7382       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7383       PetscCall(ISGetIndices(glob, &globidx));
7384       rmapt[cp] = 1;
7385       cmapt[cp] = 2;
7386       cmapa[cp] = globidx;
7387       mptmp[cp] = PETSC_FALSE;
7388       cp++;
7389       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7390       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7391       PetscCall(MatProductSetFill(mp[cp], product->fill));
7392       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7393       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7394       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7395       mp[cp]->product->api_user = product->api_user;
7396       PetscCall(MatProductSetFromOptions(mp[cp]));
7397       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7398       rmapt[cp] = 2;
7399       rmapa[cp] = p->garray;
7400       cmapt[cp] = 2;
7401       cmapa[cp] = globidx;
7402       mptmp[cp] = PETSC_FALSE;
7403       cp++;
7404     }
7405     break;
7406   case MATPRODUCT_PtAP:
7407     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7408     /* P is product->B */
7409     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7410     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7411     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7412     PetscCall(MatProductSetFill(mp[cp], product->fill));
7413     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7414     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7415     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7416     mp[cp]->product->api_user = product->api_user;
7417     PetscCall(MatProductSetFromOptions(mp[cp]));
7418     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7419     PetscCall(ISGetIndices(glob, &globidx));
7420     rmapt[cp] = 2;
7421     rmapa[cp] = globidx;
7422     cmapt[cp] = 2;
7423     cmapa[cp] = globidx;
7424     mptmp[cp] = PETSC_FALSE;
7425     cp++;
7426     if (mmdata->P_oth) {
7427       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7428       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7429       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7430       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7431       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7432       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7433       PetscCall(MatProductSetFill(mp[cp], product->fill));
7434       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7435       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7436       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7437       mp[cp]->product->api_user = product->api_user;
7438       PetscCall(MatProductSetFromOptions(mp[cp]));
7439       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7440       mptmp[cp] = PETSC_TRUE;
7441       cp++;
7442       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7443       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7444       PetscCall(MatProductSetFill(mp[cp], product->fill));
7445       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7446       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7447       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7448       mp[cp]->product->api_user = product->api_user;
7449       PetscCall(MatProductSetFromOptions(mp[cp]));
7450       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7451       rmapt[cp] = 2;
7452       rmapa[cp] = globidx;
7453       cmapt[cp] = 2;
7454       cmapa[cp] = P_oth_idx;
7455       mptmp[cp] = PETSC_FALSE;
7456       cp++;
7457     }
7458     break;
7459   default:
7460     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7461   }
7462   /* sanity check */
7463   if (size > 1)
7464     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7465 
7466   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7467   for (i = 0; i < cp; i++) {
7468     mmdata->mp[i]    = mp[i];
7469     mmdata->mptmp[i] = mptmp[i];
7470   }
7471   mmdata->cp             = cp;
7472   C->product->data       = mmdata;
7473   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7474   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7475 
7476   /* memory type */
7477   mmdata->mtype = PETSC_MEMTYPE_HOST;
7478   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7479   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7480   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7481   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7482   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7483   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7484 
7485   /* prepare coo coordinates for values insertion */
7486 
7487   /* count total nonzeros of those intermediate seqaij Mats
7488     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7489     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7490     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7491   */
7492   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7493     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7494     if (mptmp[cp]) continue;
7495     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7496       const PetscInt *rmap = rmapa[cp];
7497       const PetscInt  mr   = mp[cp]->rmap->n;
7498       const PetscInt  rs   = C->rmap->rstart;
7499       const PetscInt  re   = C->rmap->rend;
7500       const PetscInt *ii   = mm->i;
7501       for (i = 0; i < mr; i++) {
7502         const PetscInt gr = rmap[i];
7503         const PetscInt nz = ii[i + 1] - ii[i];
7504         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7505         else ncoo_oown += nz;                  /* this row is local */
7506       }
7507     } else ncoo_d += mm->nz;
7508   }
7509 
7510   /*
7511     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7512 
7513     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7514 
7515     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7516 
7517     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7518     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7519     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7520 
7521     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7522     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7523   */
7524   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7525   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7526 
7527   /* gather (i,j) of nonzeros inserted by remote procs */
7528   if (hasoffproc) {
7529     PetscSF  msf;
7530     PetscInt ncoo2, *coo_i2, *coo_j2;
7531 
7532     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7533     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7534     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7535 
7536     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7537       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7538       PetscInt   *idxoff = mmdata->off[cp];
7539       PetscInt   *idxown = mmdata->own[cp];
7540       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7541         const PetscInt *rmap = rmapa[cp];
7542         const PetscInt *cmap = cmapa[cp];
7543         const PetscInt *ii   = mm->i;
7544         PetscInt       *coi  = coo_i + ncoo_o;
7545         PetscInt       *coj  = coo_j + ncoo_o;
7546         const PetscInt  mr   = mp[cp]->rmap->n;
7547         const PetscInt  rs   = C->rmap->rstart;
7548         const PetscInt  re   = C->rmap->rend;
7549         const PetscInt  cs   = C->cmap->rstart;
7550         for (i = 0; i < mr; i++) {
7551           const PetscInt *jj = mm->j + ii[i];
7552           const PetscInt  gr = rmap[i];
7553           const PetscInt  nz = ii[i + 1] - ii[i];
7554           if (gr < rs || gr >= re) { /* this is an offproc row */
7555             for (j = ii[i]; j < ii[i + 1]; j++) {
7556               *coi++    = gr;
7557               *idxoff++ = j;
7558             }
7559             if (!cmapt[cp]) { /* already global */
7560               for (j = 0; j < nz; j++) *coj++ = jj[j];
7561             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7562               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7563             } else { /* offdiag */
7564               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7565             }
7566             ncoo_o += nz;
7567           } else { /* this is a local row */
7568             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7569           }
7570         }
7571       }
7572       mmdata->off[cp + 1] = idxoff;
7573       mmdata->own[cp + 1] = idxown;
7574     }
7575 
7576     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7577     PetscInt incoo_o;
7578     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7579     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7580     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7581     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7582     ncoo = ncoo_d + ncoo_oown + ncoo2;
7583     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7584     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7585     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7586     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7587     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7588     PetscCall(PetscFree2(coo_i, coo_j));
7589     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7590     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7591     coo_i = coo_i2;
7592     coo_j = coo_j2;
7593   } else { /* no offproc values insertion */
7594     ncoo = ncoo_d;
7595     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7596 
7597     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7598     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7599     PetscCall(PetscSFSetUp(mmdata->sf));
7600   }
7601   mmdata->hasoffproc = hasoffproc;
7602 
7603   /* gather (i,j) of nonzeros inserted locally */
7604   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7605     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7606     PetscInt       *coi  = coo_i + ncoo_d;
7607     PetscInt       *coj  = coo_j + ncoo_d;
7608     const PetscInt *jj   = mm->j;
7609     const PetscInt *ii   = mm->i;
7610     const PetscInt *cmap = cmapa[cp];
7611     const PetscInt *rmap = rmapa[cp];
7612     const PetscInt  mr   = mp[cp]->rmap->n;
7613     const PetscInt  rs   = C->rmap->rstart;
7614     const PetscInt  re   = C->rmap->rend;
7615     const PetscInt  cs   = C->cmap->rstart;
7616 
7617     if (mptmp[cp]) continue;
7618     if (rmapt[cp] == 1) { /* consecutive rows */
7619       /* fill coo_i */
7620       for (i = 0; i < mr; i++) {
7621         const PetscInt gr = i + rs;
7622         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7623       }
7624       /* fill coo_j */
7625       if (!cmapt[cp]) { /* type-0, already global */
7626         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7627       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7628         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7629       } else {                                            /* type-2, local to global for sparse columns */
7630         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7631       }
7632       ncoo_d += mm->nz;
7633     } else if (rmapt[cp] == 2) { /* sparse rows */
7634       for (i = 0; i < mr; i++) {
7635         const PetscInt *jj = mm->j + ii[i];
7636         const PetscInt  gr = rmap[i];
7637         const PetscInt  nz = ii[i + 1] - ii[i];
7638         if (gr >= rs && gr < re) { /* local rows */
7639           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7640           if (!cmapt[cp]) { /* type-0, already global */
7641             for (j = 0; j < nz; j++) *coj++ = jj[j];
7642           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7643             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7644           } else { /* type-2, local to global for sparse columns */
7645             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7646           }
7647           ncoo_d += nz;
7648         }
7649       }
7650     }
7651   }
7652   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7653   PetscCall(ISDestroy(&glob));
7654   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7655   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7656   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7657   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7658 
7659   /* preallocate with COO data */
7660   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7661   PetscCall(PetscFree2(coo_i, coo_j));
7662   PetscFunctionReturn(PETSC_SUCCESS);
7663 }
7664 
7665 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7666 {
7667   Mat_Product *product = mat->product;
7668 #if defined(PETSC_HAVE_DEVICE)
7669   PetscBool match  = PETSC_FALSE;
7670   PetscBool usecpu = PETSC_FALSE;
7671 #else
7672   PetscBool match = PETSC_TRUE;
7673 #endif
7674 
7675   PetscFunctionBegin;
7676   MatCheckProduct(mat, 1);
7677 #if defined(PETSC_HAVE_DEVICE)
7678   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7679   if (match) { /* we can always fallback to the CPU if requested */
7680     switch (product->type) {
7681     case MATPRODUCT_AB:
7682       if (product->api_user) {
7683         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7684         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7685         PetscOptionsEnd();
7686       } else {
7687         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7688         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7689         PetscOptionsEnd();
7690       }
7691       break;
7692     case MATPRODUCT_AtB:
7693       if (product->api_user) {
7694         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7695         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7696         PetscOptionsEnd();
7697       } else {
7698         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7699         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7700         PetscOptionsEnd();
7701       }
7702       break;
7703     case MATPRODUCT_PtAP:
7704       if (product->api_user) {
7705         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7706         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7707         PetscOptionsEnd();
7708       } else {
7709         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7710         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7711         PetscOptionsEnd();
7712       }
7713       break;
7714     default:
7715       break;
7716     }
7717     match = (PetscBool)!usecpu;
7718   }
7719 #endif
7720   if (match) {
7721     switch (product->type) {
7722     case MATPRODUCT_AB:
7723     case MATPRODUCT_AtB:
7724     case MATPRODUCT_PtAP:
7725       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7726       break;
7727     default:
7728       break;
7729     }
7730   }
7731   /* fallback to MPIAIJ ops */
7732   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7733   PetscFunctionReturn(PETSC_SUCCESS);
7734 }
7735 
7736 /*
7737    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7738 
7739    n - the number of block indices in cc[]
7740    cc - the block indices (must be large enough to contain the indices)
7741 */
7742 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7743 {
7744   PetscInt        cnt = -1, nidx, j;
7745   const PetscInt *idx;
7746 
7747   PetscFunctionBegin;
7748   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7749   if (nidx) {
7750     cnt     = 0;
7751     cc[cnt] = idx[0] / bs;
7752     for (j = 1; j < nidx; j++) {
7753       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7754     }
7755   }
7756   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7757   *n = cnt + 1;
7758   PetscFunctionReturn(PETSC_SUCCESS);
7759 }
7760 
7761 /*
7762     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7763 
7764     ncollapsed - the number of block indices
7765     collapsed - the block indices (must be large enough to contain the indices)
7766 */
7767 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7768 {
7769   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7770 
7771   PetscFunctionBegin;
7772   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7773   for (i = start + 1; i < start + bs; i++) {
7774     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7775     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7776     cprevtmp = cprev;
7777     cprev    = merged;
7778     merged   = cprevtmp;
7779   }
7780   *ncollapsed = nprev;
7781   if (collapsed) *collapsed = cprev;
7782   PetscFunctionReturn(PETSC_SUCCESS);
7783 }
7784 
7785 /*
7786  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7787 
7788  Input Parameter:
7789  . Amat - matrix
7790  - symmetrize - make the result symmetric
7791  + scale - scale with diagonal
7792 
7793  Output Parameter:
7794  . a_Gmat - output scalar graph >= 0
7795 
7796 */
7797 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7798 {
7799   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7800   MPI_Comm  comm;
7801   Mat       Gmat;
7802   PetscBool ismpiaij, isseqaij;
7803   Mat       a, b, c;
7804   MatType   jtype;
7805 
7806   PetscFunctionBegin;
7807   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7808   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7809   PetscCall(MatGetSize(Amat, &MM, &NN));
7810   PetscCall(MatGetBlockSize(Amat, &bs));
7811   nloc = (Iend - Istart) / bs;
7812 
7813   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7814   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7815   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7816 
7817   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7818   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7819      implementation */
7820   if (bs > 1) {
7821     PetscCall(MatGetType(Amat, &jtype));
7822     PetscCall(MatCreate(comm, &Gmat));
7823     PetscCall(MatSetType(Gmat, jtype));
7824     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7825     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7826     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7827       PetscInt  *d_nnz, *o_nnz;
7828       MatScalar *aa, val, *AA;
7829       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7830 
7831       if (isseqaij) {
7832         a = Amat;
7833         b = NULL;
7834       } else {
7835         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7836         a             = d->A;
7837         b             = d->B;
7838       }
7839       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7840       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7841       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7842         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7843         const PetscInt *cols1, *cols2;
7844 
7845         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7846           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7847           nnz[brow / bs] = nc2 / bs;
7848           if (nc2 % bs) ok = 0;
7849           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7850           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7851             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7852             if (nc1 != nc2) ok = 0;
7853             else {
7854               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7855                 if (cols1[jj] != cols2[jj]) ok = 0;
7856                 if (cols1[jj] % bs != jj % bs) ok = 0;
7857               }
7858             }
7859             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7860           }
7861           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7862           if (!ok) {
7863             PetscCall(PetscFree2(d_nnz, o_nnz));
7864             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7865             goto old_bs;
7866           }
7867         }
7868       }
7869       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7870       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7871       PetscCall(PetscFree2(d_nnz, o_nnz));
7872       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7873       // diag
7874       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7875         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7876 
7877         ai = aseq->i;
7878         n  = ai[brow + 1] - ai[brow];
7879         aj = aseq->j + ai[brow];
7880         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7881           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7882           val        = 0;
7883           if (index_size == 0) {
7884             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7885               aa = aseq->a + ai[brow + ii] + k;
7886               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7887                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7888               }
7889             }
7890           } else {                                            // use (index,index) value if provided
7891             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7892               PetscInt ii = index[iii];
7893               aa          = aseq->a + ai[brow + ii] + k;
7894               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7895                 PetscInt jj = index[jjj];
7896                 val += PetscAbs(PetscRealPart(aa[jj]));
7897               }
7898             }
7899           }
7900           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7901           AA[k / bs] = val;
7902         }
7903         grow = Istart / bs + brow / bs;
7904         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7905       }
7906       // off-diag
7907       if (ismpiaij) {
7908         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7909         const PetscScalar *vals;
7910         const PetscInt    *cols, *garray = aij->garray;
7911 
7912         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7913         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7914           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7915           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7916             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7917             AA[k / bs] = 0;
7918             AJ[cidx]   = garray[cols[k]] / bs;
7919           }
7920           nc = ncols / bs;
7921           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7922           if (index_size == 0) {
7923             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7924               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7925               for (PetscInt k = 0; k < ncols; k += bs) {
7926                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7927                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7928                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7929                 }
7930               }
7931               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7932             }
7933           } else {                                            // use (index,index) value if provided
7934             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7935               PetscInt ii = index[iii];
7936               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7937               for (PetscInt k = 0; k < ncols; k += bs) {
7938                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7939                   PetscInt jj = index[jjj];
7940                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7941                 }
7942               }
7943               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7944             }
7945           }
7946           grow = Istart / bs + brow / bs;
7947           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7948         }
7949       }
7950       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7951       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7952       PetscCall(PetscFree2(AA, AJ));
7953     } else {
7954       const PetscScalar *vals;
7955       const PetscInt    *idx;
7956       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7957     old_bs:
7958       /*
7959        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7960        */
7961       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7962       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7963       if (isseqaij) {
7964         PetscInt max_d_nnz;
7965 
7966         /*
7967          Determine exact preallocation count for (sequential) scalar matrix
7968          */
7969         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7970         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7971         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7972         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7973         PetscCall(PetscFree3(w0, w1, w2));
7974       } else if (ismpiaij) {
7975         Mat             Daij, Oaij;
7976         const PetscInt *garray;
7977         PetscInt        max_d_nnz;
7978 
7979         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7980         /*
7981          Determine exact preallocation count for diagonal block portion of scalar matrix
7982          */
7983         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7984         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7985         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7986         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7987         PetscCall(PetscFree3(w0, w1, w2));
7988         /*
7989          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7990          */
7991         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7992           o_nnz[jj] = 0;
7993           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7994             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7995             o_nnz[jj] += ncols;
7996             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7997           }
7998           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7999         }
8000       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
8001       /* get scalar copy (norms) of matrix */
8002       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
8003       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
8004       PetscCall(PetscFree2(d_nnz, o_nnz));
8005       for (Ii = Istart; Ii < Iend; Ii++) {
8006         PetscInt dest_row = Ii / bs;
8007 
8008         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
8009         for (jj = 0; jj < ncols; jj++) {
8010           PetscInt    dest_col = idx[jj] / bs;
8011           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
8012 
8013           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
8014         }
8015         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
8016       }
8017       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8018       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8019     }
8020   } else {
8021     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8022     else {
8023       Gmat = Amat;
8024       PetscCall(PetscObjectReference((PetscObject)Gmat));
8025     }
8026     if (isseqaij) {
8027       a = Gmat;
8028       b = NULL;
8029     } else {
8030       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8031       a             = d->A;
8032       b             = d->B;
8033     }
8034     if (filter >= 0 || scale) {
8035       /* take absolute value of each entry */
8036       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8037         MatInfo      info;
8038         PetscScalar *avals;
8039 
8040         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8041         PetscCall(MatSeqAIJGetArray(c, &avals));
8042         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8043         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8044       }
8045     }
8046   }
8047   if (symmetrize) {
8048     PetscBool isset, issym;
8049 
8050     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8051     if (!isset || !issym) {
8052       Mat matTrans;
8053 
8054       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8055       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8056       PetscCall(MatDestroy(&matTrans));
8057     }
8058     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8059   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8060   if (scale) {
8061     /* scale c for all diagonal values = 1 or -1 */
8062     Vec diag;
8063 
8064     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8065     PetscCall(MatGetDiagonal(Gmat, diag));
8066     PetscCall(VecReciprocal(diag));
8067     PetscCall(VecSqrtAbs(diag));
8068     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8069     PetscCall(VecDestroy(&diag));
8070   }
8071   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8072   if (filter >= 0) {
8073     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8074     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8075   }
8076   *a_Gmat = Gmat;
8077   PetscFunctionReturn(PETSC_SUCCESS);
8078 }
8079 
8080 /*
8081     Special version for direct calls from Fortran
8082 */
8083 
8084 /* Change these macros so can be used in void function */
8085 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8086 #undef PetscCall
8087 #define PetscCall(...) \
8088   do { \
8089     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8090     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8091       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8092       return; \
8093     } \
8094   } while (0)
8095 
8096 #undef SETERRQ
8097 #define SETERRQ(comm, ierr, ...) \
8098   do { \
8099     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8100     return; \
8101   } while (0)
8102 
8103 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8104   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8105 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8106   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8107 #else
8108 #endif
8109 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8110 {
8111   Mat         mat = *mmat;
8112   PetscInt    m = *mm, n = *mn;
8113   InsertMode  addv = *maddv;
8114   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8115   PetscScalar value;
8116 
8117   MatCheckPreallocated(mat, 1);
8118   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8119   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8120   {
8121     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8122     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8123     PetscBool roworiented = aij->roworiented;
8124 
8125     /* Some Variables required in the macro */
8126     Mat         A     = aij->A;
8127     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8128     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8129     MatScalar  *aa;
8130     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8131     Mat         B                 = aij->B;
8132     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8133     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8134     MatScalar  *ba;
8135     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8136      * cannot use "#if defined" inside a macro. */
8137     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8138 
8139     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8140     PetscInt   nonew = a->nonew;
8141     MatScalar *ap1, *ap2;
8142 
8143     PetscFunctionBegin;
8144     PetscCall(MatSeqAIJGetArray(A, &aa));
8145     PetscCall(MatSeqAIJGetArray(B, &ba));
8146     for (i = 0; i < m; i++) {
8147       if (im[i] < 0) continue;
8148       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8149       if (im[i] >= rstart && im[i] < rend) {
8150         row      = im[i] - rstart;
8151         lastcol1 = -1;
8152         rp1      = aj + ai[row];
8153         ap1      = aa + ai[row];
8154         rmax1    = aimax[row];
8155         nrow1    = ailen[row];
8156         low1     = 0;
8157         high1    = nrow1;
8158         lastcol2 = -1;
8159         rp2      = bj + bi[row];
8160         ap2      = ba + bi[row];
8161         rmax2    = bimax[row];
8162         nrow2    = bilen[row];
8163         low2     = 0;
8164         high2    = nrow2;
8165 
8166         for (j = 0; j < n; j++) {
8167           if (roworiented) value = v[i * n + j];
8168           else value = v[i + j * m];
8169           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8170           if (in[j] >= cstart && in[j] < cend) {
8171             col = in[j] - cstart;
8172             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8173           } else if (in[j] < 0) continue;
8174           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8175             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8176           } else {
8177             if (mat->was_assembled) {
8178               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8179 #if defined(PETSC_USE_CTABLE)
8180               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8181               col--;
8182 #else
8183               col = aij->colmap[in[j]] - 1;
8184 #endif
8185               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8186                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8187                 col = in[j];
8188                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8189                 B        = aij->B;
8190                 b        = (Mat_SeqAIJ *)B->data;
8191                 bimax    = b->imax;
8192                 bi       = b->i;
8193                 bilen    = b->ilen;
8194                 bj       = b->j;
8195                 rp2      = bj + bi[row];
8196                 ap2      = ba + bi[row];
8197                 rmax2    = bimax[row];
8198                 nrow2    = bilen[row];
8199                 low2     = 0;
8200                 high2    = nrow2;
8201                 bm       = aij->B->rmap->n;
8202                 ba       = b->a;
8203                 inserted = PETSC_FALSE;
8204               }
8205             } else col = in[j];
8206             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8207           }
8208         }
8209       } else if (!aij->donotstash) {
8210         if (roworiented) {
8211           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8212         } else {
8213           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8214         }
8215       }
8216     }
8217     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8218     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8219   }
8220   PetscFunctionReturnVoid();
8221 }
8222 
8223 /* Undefining these here since they were redefined from their original definition above! No
8224  * other PETSc functions should be defined past this point, as it is impossible to recover the
8225  * original definitions */
8226 #undef PetscCall
8227 #undef SETERRQ
8228