xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d698cf032b9f026769ec2c47162caba78ed81a3e)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK)
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309 
310   PetscFunctionBegin;
311   PetscCall(MatGetSize(A, &m, &n));
312   PetscCall(PetscCalloc1(n, &work));
313   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
314   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
316   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
317   if (type == NORM_2) {
318     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
319     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
320   } else if (type == NORM_1) {
321     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
322     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
323   } else if (type == NORM_INFINITY) {
324     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
325     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
327     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
328     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
329   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
330     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
331     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
332   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
333   if (type == NORM_INFINITY) {
334     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
335   } else {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
337   }
338   PetscCall(PetscFree(work));
339   if (type == NORM_2) {
340     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
341   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
342     for (i = 0; i < n; i++) reductions[i] /= m;
343   }
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 
347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
348 {
349   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
350   IS              sis, gis;
351   const PetscInt *isis, *igis;
352   PetscInt        n, *iis, nsis, ngis, rstart, i;
353 
354   PetscFunctionBegin;
355   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
356   PetscCall(MatFindNonzeroRows(a->B, &gis));
357   PetscCall(ISGetSize(gis, &ngis));
358   PetscCall(ISGetSize(sis, &nsis));
359   PetscCall(ISGetIndices(sis, &isis));
360   PetscCall(ISGetIndices(gis, &igis));
361 
362   PetscCall(PetscMalloc1(ngis + nsis, &iis));
363   PetscCall(PetscArraycpy(iis, igis, ngis));
364   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
365   n = ngis + nsis;
366   PetscCall(PetscSortRemoveDupsInt(&n, iis));
367   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
368   for (i = 0; i < n; i++) iis[i] += rstart;
369   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
370 
371   PetscCall(ISRestoreIndices(sis, &isis));
372   PetscCall(ISRestoreIndices(gis, &igis));
373   PetscCall(ISDestroy(&sis));
374   PetscCall(ISDestroy(&gis));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*
379   Local utility routine that creates a mapping from the global column
380 number to the local number in the off-diagonal part of the local
381 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
382 a slightly higher hash table cost; without it it is not scalable (each processor
383 has an order N integer array but is fast to access.
384 */
385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
386 {
387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
388   PetscInt    n   = aij->B->cmap->n, i;
389 
390   PetscFunctionBegin;
391   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
392 #if defined(PETSC_USE_CTABLE)
393   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
394   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
395 #else
396   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
397   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
398 #endif
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
403   do { \
404     if (col <= lastcol1) low1 = 0; \
405     else high1 = nrow1; \
406     lastcol1 = col; \
407     while (high1 - low1 > 5) { \
408       t = (low1 + high1) / 2; \
409       if (rp1[t] > col) high1 = t; \
410       else low1 = t; \
411     } \
412     for (_i = low1; _i < high1; _i++) { \
413       if (rp1[_i] > col) break; \
414       if (rp1[_i] == col) { \
415         if (addv == ADD_VALUES) { \
416           ap1[_i] += value; \
417           /* Not sure LogFlops will slow dow the code or not */ \
418           (void)PetscLogFlops(1.0); \
419         } else ap1[_i] = value; \
420         goto a_noinsert; \
421       } \
422     } \
423     if (value == 0.0 && ignorezeroentries && row != col) { \
424       low1  = 0; \
425       high1 = nrow1; \
426       goto a_noinsert; \
427     } \
428     if (nonew == 1) { \
429       low1  = 0; \
430       high1 = nrow1; \
431       goto a_noinsert; \
432     } \
433     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
434     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
435     N = nrow1++ - 1; \
436     a->nz++; \
437     high1++; \
438     /* shift up all the later entries in this row */ \
439     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
440     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
441     rp1[_i] = col; \
442     ap1[_i] = value; \
443   a_noinsert:; \
444     ailen[row] = nrow1; \
445   } while (0)
446 
447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
448   do { \
449     if (col <= lastcol2) low2 = 0; \
450     else high2 = nrow2; \
451     lastcol2 = col; \
452     while (high2 - low2 > 5) { \
453       t = (low2 + high2) / 2; \
454       if (rp2[t] > col) high2 = t; \
455       else low2 = t; \
456     } \
457     for (_i = low2; _i < high2; _i++) { \
458       if (rp2[_i] > col) break; \
459       if (rp2[_i] == col) { \
460         if (addv == ADD_VALUES) { \
461           ap2[_i] += value; \
462           (void)PetscLogFlops(1.0); \
463         } else ap2[_i] = value; \
464         goto b_noinsert; \
465       } \
466     } \
467     if (value == 0.0 && ignorezeroentries) { \
468       low2  = 0; \
469       high2 = nrow2; \
470       goto b_noinsert; \
471     } \
472     if (nonew == 1) { \
473       low2  = 0; \
474       high2 = nrow2; \
475       goto b_noinsert; \
476     } \
477     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
478     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
479     N = nrow2++ - 1; \
480     b->nz++; \
481     high2++; \
482     /* shift up all the later entries in this row */ \
483     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
484     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
485     rp2[_i] = col; \
486     ap2[_i] = value; \
487   b_noinsert:; \
488     bilen[row] = nrow2; \
489   } while (0)
490 
491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
492 {
493   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
494   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
495   PetscInt     l, *garray                         = mat->garray, diag;
496   PetscScalar *aa, *ba;
497 
498   PetscFunctionBegin;
499   /* code only works for square matrices A */
500 
501   /* find size of row to the left of the diagonal part */
502   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
503   row = row - diag;
504   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
505     if (garray[b->j[b->i[row] + l]] > diag) break;
506   }
507   if (l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512 
513   /* diagonal part */
514   if (a->i[row + 1] - a->i[row]) {
515     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
516     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
517     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
518   }
519 
520   /* right of diagonal part */
521   if (b->i[row + 1] - b->i[row] - l) {
522     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
523     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
524     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
525   }
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
530 {
531   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
532   PetscScalar value = 0.0;
533   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
534   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
535   PetscBool   roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat         A     = aij->A;
539   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
540   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
541   PetscBool   ignorezeroentries = a->ignorezeroentries;
542   Mat         B                 = aij->B;
543   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
544   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
545   MatScalar  *aa, *ba;
546   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
547   PetscInt    nonew;
548   MatScalar  *ap1, *ap2;
549 
550   PetscFunctionBegin;
551   PetscCall(MatSeqAIJGetArray(A, &aa));
552   PetscCall(MatSeqAIJGetArray(B, &ba));
553   for (i = 0; i < m; i++) {
554     if (im[i] < 0) continue;
555     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
560       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
567       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j = 0; j < n; j++) {
574         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
575         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
580         } else if (in[j] < 0) {
581           continue;
582         } else {
583           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
584           if (mat->was_assembled) {
585             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
586 #if defined(PETSC_USE_CTABLE)
587             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
593               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
594               col = in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ *)B->data;
598               bimax = b->imax;
599               bi    = b->i;
600               bilen = b->ilen;
601               bj    = b->j;
602               ba    = b->a;
603               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
604               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
612               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
613                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
614               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
619         }
620       }
621     } else {
622       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
627         } else {
628           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
629         }
630       }
631     }
632   }
633   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
634   PetscCall(MatSeqAIJRestoreArray(B, &ba));
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
638 /*
639     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
640     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
641     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
642 */
643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
644 {
645   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
646   Mat         A      = aij->A; /* diagonal part of the matrix */
647   Mat         B      = aij->B; /* off-diagonal part of the matrix */
648   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
649   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
650   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
651   PetscInt   *ailen = a->ilen, *aj = a->j;
652   PetscInt   *bilen = b->ilen, *bj = b->j;
653   PetscInt    am          = aij->A->rmap->n, j;
654   PetscInt    diag_so_far = 0, dnz;
655   PetscInt    offd_so_far = 0, onz;
656 
657   PetscFunctionBegin;
658   /* Iterate over all rows of the matrix */
659   for (j = 0; j < am; j++) {
660     dnz = onz = 0;
661     /*  Iterate over all non-zero columns of the current row */
662     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
663       /* If column is in the diagonal */
664       if (mat_j[col] >= cstart && mat_j[col] < cend) {
665         aj[diag_so_far++] = mat_j[col] - cstart;
666         dnz++;
667       } else { /* off-diagonal entries */
668         bj[offd_so_far++] = mat_j[col];
669         onz++;
670       }
671     }
672     ailen[j] = dnz;
673     bilen[j] = onz;
674   }
675   PetscFunctionReturn(PETSC_SUCCESS);
676 }
677 
678 /*
679     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
680     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
681     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
682     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
683     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
684 */
685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
686 {
687   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
688   Mat          A    = aij->A; /* diagonal part of the matrix */
689   Mat          B    = aij->B; /* off-diagonal part of the matrix */
690   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
691   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
692   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
693   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
694   PetscInt    *ailen = a->ilen, *aj = a->j;
695   PetscInt    *bilen = b->ilen, *bj = b->j;
696   PetscInt     am          = aij->A->rmap->n, j;
697   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
698   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
699   PetscScalar *aa = a->a, *ba = b->a;
700 
701   PetscFunctionBegin;
702   /* Iterate over all rows of the matrix */
703   for (j = 0; j < am; j++) {
704     dnz_row = onz_row = 0;
705     rowstart_offd     = full_offd_i[j];
706     rowstart_diag     = full_diag_i[j];
707     /*  Iterate over all non-zero columns of the current row */
708     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
709       /* If column is in the diagonal */
710       if (mat_j[col] >= cstart && mat_j[col] < cend) {
711         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
712         aa[rowstart_diag + dnz_row] = mat_a[col];
713         dnz_row++;
714       } else { /* off-diagonal entries */
715         bj[rowstart_offd + onz_row] = mat_j[col];
716         ba[rowstart_offd + onz_row] = mat_a[col];
717         onz_row++;
718       }
719     }
720     ailen[j] = dnz_row;
721     bilen[j] = onz_row;
722   }
723   PetscFunctionReturn(PETSC_SUCCESS);
724 }
725 
726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
727 {
728   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
729   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
730   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
731 
732   PetscFunctionBegin;
733   for (i = 0; i < m; i++) {
734     if (idxm[i] < 0) continue; /* negative row */
735     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
736     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
737     row = idxm[i] - rstart;
738     for (j = 0; j < n; j++) {
739       if (idxn[j] < 0) continue; /* negative column */
740       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
741       if (idxn[j] >= cstart && idxn[j] < cend) {
742         col = idxn[j] - cstart;
743         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
744       } else {
745         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
746 #if defined(PETSC_USE_CTABLE)
747         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
748         col--;
749 #else
750         col = aij->colmap[idxn[j]] - 1;
751 #endif
752         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
753         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
754       }
755     }
756   }
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
761 {
762   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
763   PetscInt    nstash, reallocs;
764 
765   PetscFunctionBegin;
766   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
767 
768   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
769   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
770   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
771   PetscFunctionReturn(PETSC_SUCCESS);
772 }
773 
774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
775 {
776   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
777   PetscMPIInt  n;
778   PetscInt     i, j, rstart, ncols, flg;
779   PetscInt    *row, *col;
780   PetscBool    other_disassembled;
781   PetscScalar *val;
782 
783   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
784 
785   PetscFunctionBegin;
786   if (!aij->donotstash && !mat->nooffprocentries) {
787     while (1) {
788       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
789       if (!flg) break;
790 
791       for (i = 0; i < n;) {
792         /* Now identify the consecutive vals belonging to the same row */
793         for (j = i, rstart = row[j]; j < n; j++) {
794           if (row[j] != rstart) break;
795         }
796         if (j < n) ncols = j - i;
797         else ncols = n - i;
798         /* Now assemble all these values with a single function call */
799         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
800         i = j;
801       }
802     }
803     PetscCall(MatStashScatterEnd_Private(&mat->stash));
804   }
805 #if defined(PETSC_HAVE_DEVICE)
806   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
807   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
808   if (mat->boundtocpu) {
809     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
810     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
811   }
812 #endif
813   PetscCall(MatAssemblyBegin(aij->A, mode));
814   PetscCall(MatAssemblyEnd(aij->A, mode));
815 
816   /* determine if any processor has disassembled, if so we must
817      also disassemble ourself, in order that we may reassemble. */
818   /*
819      if nonzero structure of submatrix B cannot change then we know that
820      no processor disassembled thus we can skip this stuff
821   */
822   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
823     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
824     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
825       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
826     }
827   }
828   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
829   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
830 #if defined(PETSC_HAVE_DEVICE)
831   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
832 #endif
833   PetscCall(MatAssemblyBegin(aij->B, mode));
834   PetscCall(MatAssemblyEnd(aij->B, mode));
835 
836   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
837 
838   aij->rowvalues = NULL;
839 
840   PetscCall(VecDestroy(&aij->diag));
841 
842   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
843   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
844     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
845     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
846   }
847 #if defined(PETSC_HAVE_DEVICE)
848   mat->offloadmask = PETSC_OFFLOAD_BOTH;
849 #endif
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
854 {
855   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
856 
857   PetscFunctionBegin;
858   PetscCall(MatZeroEntries(l->A));
859   PetscCall(MatZeroEntries(l->B));
860   PetscFunctionReturn(PETSC_SUCCESS);
861 }
862 
863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
864 {
865   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
866   PetscInt   *lrows;
867   PetscInt    r, len;
868   PetscBool   cong;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
873   PetscCall(MatHasCongruentLayouts(A, &cong));
874   /* fix right-hand side if needed */
875   if (x && b) {
876     const PetscScalar *xx;
877     PetscScalar       *bb;
878 
879     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
880     PetscCall(VecGetArrayRead(x, &xx));
881     PetscCall(VecGetArray(b, &bb));
882     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
883     PetscCall(VecRestoreArrayRead(x, &xx));
884     PetscCall(VecRestoreArray(b, &bb));
885   }
886 
887   if (diag != 0.0 && cong) {
888     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
889     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
890   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
891     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
892     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
893     PetscInt    nnwA, nnwB;
894     PetscBool   nnzA, nnzB;
895 
896     nnwA = aijA->nonew;
897     nnwB = aijB->nonew;
898     nnzA = aijA->keepnonzeropattern;
899     nnzB = aijB->keepnonzeropattern;
900     if (!nnzA) {
901       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
902       aijA->nonew = 0;
903     }
904     if (!nnzB) {
905       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
906       aijB->nonew = 0;
907     }
908     /* Must zero here before the next loop */
909     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
910     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
911     for (r = 0; r < len; ++r) {
912       const PetscInt row = lrows[r] + A->rmap->rstart;
913       if (row >= A->cmap->N) continue;
914       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
915     }
916     aijA->nonew = nnwA;
917     aijB->nonew = nnwB;
918   } else {
919     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
920     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
921   }
922   PetscCall(PetscFree(lrows));
923   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
924   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
928     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
929     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
930   }
931   PetscFunctionReturn(PETSC_SUCCESS);
932 }
933 
934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
935 {
936   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
937   PetscInt           n = A->rmap->n;
938   PetscInt           i, j, r, m, len = 0;
939   PetscInt          *lrows, *owners = A->rmap->range;
940   PetscMPIInt        p = 0;
941   PetscSFNode       *rrows;
942   PetscSF            sf;
943   const PetscScalar *xx;
944   PetscScalar       *bb, *mask, *aij_a;
945   Vec                xmask, lmask;
946   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
947   const PetscInt    *aj, *ii, *ridx;
948   PetscScalar       *aa;
949 
950   PetscFunctionBegin;
951   /* Create SF where leaves are input rows and roots are owned rows */
952   PetscCall(PetscMalloc1(n, &lrows));
953   for (r = 0; r < n; ++r) lrows[r] = -1;
954   PetscCall(PetscMalloc1(N, &rrows));
955   for (r = 0; r < N; ++r) {
956     const PetscInt idx = rows[r];
957     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
958     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
959       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
960     }
961     rrows[r].rank  = p;
962     rrows[r].index = rows[r] - owners[p];
963   }
964   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
965   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
966   /* Collect flags for rows to be zeroed */
967   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
968   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
969   PetscCall(PetscSFDestroy(&sf));
970   /* Compress and put in row numbers */
971   for (r = 0; r < n; ++r)
972     if (lrows[r] >= 0) lrows[len++] = r;
973   /* zero diagonal part of matrix */
974   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
975   /* handle off-diagonal part of matrix */
976   PetscCall(MatCreateVecs(A, &xmask, NULL));
977   PetscCall(VecDuplicate(l->lvec, &lmask));
978   PetscCall(VecGetArray(xmask, &bb));
979   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
980   PetscCall(VecRestoreArray(xmask, &bb));
981   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
982   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
983   PetscCall(VecDestroy(&xmask));
984   if (x && b) { /* this code is buggy when the row and column layout don't match */
985     PetscBool cong;
986 
987     PetscCall(MatHasCongruentLayouts(A, &cong));
988     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
989     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
990     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
991     PetscCall(VecGetArrayRead(l->lvec, &xx));
992     PetscCall(VecGetArray(b, &bb));
993   }
994   PetscCall(VecGetArray(lmask, &mask));
995   /* remove zeroed rows of off-diagonal matrix */
996   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
997   ii = aij->i;
998   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
999   /* loop over all elements of off process part of matrix zeroing removed columns*/
1000   if (aij->compressedrow.use) {
1001     m    = aij->compressedrow.nrows;
1002     ii   = aij->compressedrow.i;
1003     ridx = aij->compressedrow.rindex;
1004     for (i = 0; i < m; i++) {
1005       n  = ii[i + 1] - ii[i];
1006       aj = aij->j + ii[i];
1007       aa = aij_a + ii[i];
1008 
1009       for (j = 0; j < n; j++) {
1010         if (PetscAbsScalar(mask[*aj])) {
1011           if (b) bb[*ridx] -= *aa * xx[*aj];
1012           *aa = 0.0;
1013         }
1014         aa++;
1015         aj++;
1016       }
1017       ridx++;
1018     }
1019   } else { /* do not use compressed row format */
1020     m = l->B->rmap->n;
1021     for (i = 0; i < m; i++) {
1022       n  = ii[i + 1] - ii[i];
1023       aj = aij->j + ii[i];
1024       aa = aij_a + ii[i];
1025       for (j = 0; j < n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           if (b) bb[i] -= *aa * xx[*aj];
1028           *aa = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033     }
1034   }
1035   if (x && b) {
1036     PetscCall(VecRestoreArray(b, &bb));
1037     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1038   }
1039   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1040   PetscCall(VecRestoreArray(lmask, &mask));
1041   PetscCall(VecDestroy(&lmask));
1042   PetscCall(PetscFree(lrows));
1043 
1044   /* only change matrix nonzero state if pattern was allowed to be changed */
1045   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1046     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1047     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1048   }
1049   PetscFunctionReturn(PETSC_SUCCESS);
1050 }
1051 
1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1055   PetscInt    nt;
1056   VecScatter  Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   PetscCall(VecGetLocalSize(xx, &nt));
1060   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1061   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1062   PetscUseTypeMethod(a->A, mult, xx, yy);
1063   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067 
1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1069 {
1070   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1078 {
1079   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1080   VecScatter  Mvctx = a->Mvctx;
1081 
1082   PetscFunctionBegin;
1083   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1084   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1085   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1086   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1091 {
1092   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1093 
1094   PetscFunctionBegin;
1095   /* do nondiagonal part */
1096   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1097   /* do local part */
1098   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1099   /* add partial results together */
1100   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1101   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1106 {
1107   MPI_Comm    comm;
1108   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1109   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1110   IS          Me, Notme;
1111   PetscInt    M, N, first, last, *notme, i;
1112   PetscBool   lf;
1113   PetscMPIInt size;
1114 
1115   PetscFunctionBegin;
1116   /* Easy test: symmetric diagonal block */
1117   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1118   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1119   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1120   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1121   PetscCallMPI(MPI_Comm_size(comm, &size));
1122   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1123 
1124   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1125   PetscCall(MatGetSize(Amat, &M, &N));
1126   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1127   PetscCall(PetscMalloc1(N - last + first, &notme));
1128   for (i = 0; i < first; i++) notme[i] = i;
1129   for (i = last; i < M; i++) notme[i - last + first] = i;
1130   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1131   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1132   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1133   Aoff = Aoffs[0];
1134   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1135   Boff = Boffs[0];
1136   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1137   PetscCall(MatDestroyMatrices(1, &Aoffs));
1138   PetscCall(MatDestroyMatrices(1, &Boffs));
1139   PetscCall(ISDestroy(&Me));
1140   PetscCall(ISDestroy(&Notme));
1141   PetscCall(PetscFree(notme));
1142   PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144 
1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1146 {
1147   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1148 
1149   PetscFunctionBegin;
1150   /* do nondiagonal part */
1151   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1152   /* do local part */
1153   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1154   /* add partial results together */
1155   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1156   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1157   PetscFunctionReturn(PETSC_SUCCESS);
1158 }
1159 
1160 /*
1161   This only works correctly for square matrices where the subblock A->A is the
1162    diagonal block
1163 */
1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1165 {
1166   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1167 
1168   PetscFunctionBegin;
1169   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1170   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1171   PetscCall(MatGetDiagonal(a->A, v));
1172   PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174 
1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1176 {
1177   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(MatScale(a->A, aa));
1181   PetscCall(MatScale(a->B, aa));
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1186 {
1187   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1188   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1189   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1190   const PetscInt    *garray = aij->garray;
1191   const PetscScalar *aa, *ba;
1192   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1193   PetscInt64         nz, hnz;
1194   PetscInt          *rowlens;
1195   PetscInt          *colidxs;
1196   PetscScalar       *matvals;
1197   PetscMPIInt        rank;
1198 
1199   PetscFunctionBegin;
1200   PetscCall(PetscViewerSetUp(viewer));
1201 
1202   M  = mat->rmap->N;
1203   N  = mat->cmap->N;
1204   m  = mat->rmap->n;
1205   rs = mat->rmap->rstart;
1206   cs = mat->cmap->rstart;
1207   nz = A->nz + B->nz;
1208 
1209   /* write matrix header */
1210   header[0] = MAT_FILE_CLASSID;
1211   header[1] = M;
1212   header[2] = N;
1213   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1214   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1215   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1216   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1217 
1218   /* fill in and store row lengths  */
1219   PetscCall(PetscMalloc1(m, &rowlens));
1220   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1221   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1222   PetscCall(PetscFree(rowlens));
1223 
1224   /* fill in and store column indices */
1225   PetscCall(PetscMalloc1(nz, &colidxs));
1226   for (cnt = 0, i = 0; i < m; i++) {
1227     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1228       if (garray[B->j[jb]] > cs) break;
1229       colidxs[cnt++] = garray[B->j[jb]];
1230     }
1231     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1232     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1233   }
1234   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1235   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1236   PetscCall(PetscFree(colidxs));
1237 
1238   /* fill in and store nonzero values */
1239   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1240   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1241   PetscCall(PetscMalloc1(nz, &matvals));
1242   for (cnt = 0, i = 0; i < m; i++) {
1243     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1244       if (garray[B->j[jb]] > cs) break;
1245       matvals[cnt++] = ba[jb];
1246     }
1247     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1248     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1249   }
1250   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1251   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1252   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1254   PetscCall(PetscFree(matvals));
1255 
1256   /* write block size option to the viewer's .info file */
1257   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1258   PetscFunctionReturn(PETSC_SUCCESS);
1259 }
1260 
1261 #include <petscdraw.h>
1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1263 {
1264   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1265   PetscMPIInt       rank = aij->rank, size = aij->size;
1266   PetscBool         isdraw, iascii, isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1272   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1273   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1274   if (iascii) {
1275     PetscCall(PetscViewerGetFormat(viewer, &format));
1276     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1277       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1278       PetscCall(PetscMalloc1(size, &nz));
1279       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1280       for (i = 0; i < size; i++) {
1281         nmax = PetscMax(nmax, nz[i]);
1282         nmin = PetscMin(nmin, nz[i]);
1283         navg += nz[i];
1284       }
1285       PetscCall(PetscFree(nz));
1286       navg = navg / size;
1287       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1288       PetscFunctionReturn(PETSC_SUCCESS);
1289     }
1290     PetscCall(PetscViewerGetFormat(viewer, &format));
1291     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1292       MatInfo   info;
1293       PetscInt *inodes = NULL;
1294 
1295       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1296       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1297       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1298       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1299       if (!inodes) {
1300         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1301                                                      info.memory));
1302       } else {
1303         PetscCall(
1304           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1305       }
1306       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1307       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1308       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1309       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1310       PetscCall(PetscViewerFlush(viewer));
1311       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1312       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1313       PetscCall(VecScatterView(aij->Mvctx, viewer));
1314       PetscFunctionReturn(PETSC_SUCCESS);
1315     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1316       PetscInt inodecount, inodelimit, *inodes;
1317       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1318       if (inodes) {
1319         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1320       } else {
1321         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1322       }
1323       PetscFunctionReturn(PETSC_SUCCESS);
1324     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1325       PetscFunctionReturn(PETSC_SUCCESS);
1326     }
1327   } else if (isbinary) {
1328     if (size == 1) {
1329       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1330       PetscCall(MatView(aij->A, viewer));
1331     } else {
1332       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1333     }
1334     PetscFunctionReturn(PETSC_SUCCESS);
1335   } else if (iascii && size == 1) {
1336     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1337     PetscCall(MatView(aij->A, viewer));
1338     PetscFunctionReturn(PETSC_SUCCESS);
1339   } else if (isdraw) {
1340     PetscDraw draw;
1341     PetscBool isnull;
1342     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1343     PetscCall(PetscDrawIsNull(draw, &isnull));
1344     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1345   }
1346 
1347   { /* assemble the entire matrix onto first processor */
1348     Mat A = NULL, Av;
1349     IS  isrow, iscol;
1350 
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1352     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1353     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1354     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1355     /*  The commented code uses MatCreateSubMatrices instead */
1356     /*
1357     Mat *AA, A = NULL, Av;
1358     IS  isrow,iscol;
1359 
1360     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1361     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1362     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1363     if (rank == 0) {
1364        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1365        A    = AA[0];
1366        Av   = AA[0];
1367     }
1368     PetscCall(MatDestroySubMatrices(1,&AA));
1369 */
1370     PetscCall(ISDestroy(&iscol));
1371     PetscCall(ISDestroy(&isrow));
1372     /*
1373        Everyone has to call to draw the matrix since the graphics waits are
1374        synchronized across all processors that share the PetscDraw object
1375     */
1376     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1377     if (rank == 0) {
1378       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1379       PetscCall(MatView_SeqAIJ(Av, sviewer));
1380     }
1381     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1382     PetscCall(MatDestroy(&A));
1383   }
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1388 {
1389   PetscBool iascii, isdraw, issocket, isbinary;
1390 
1391   PetscFunctionBegin;
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1395   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1396   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1397   PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399 
1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1401 {
1402   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1403   Vec         bb1 = NULL;
1404   PetscBool   hasop;
1405 
1406   PetscFunctionBegin;
1407   if (flag == SOR_APPLY_UPPER) {
1408     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1409     PetscFunctionReturn(PETSC_SUCCESS);
1410   }
1411 
1412   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1413 
1414   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419 
1420     while (its--) {
1421       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1423 
1424       /* update rhs: bb1 = bb - B*x */
1425       PetscCall(VecScale(mat->lvec, -1.0));
1426       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1427 
1428       /* local sweep */
1429       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1430     }
1431   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1432     if (flag & SOR_ZERO_INITIAL_GUESS) {
1433       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1434       its--;
1435     }
1436     while (its--) {
1437       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1439 
1440       /* update rhs: bb1 = bb - B*x */
1441       PetscCall(VecScale(mat->lvec, -1.0));
1442       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1443 
1444       /* local sweep */
1445       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1446     }
1447   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1448     if (flag & SOR_ZERO_INITIAL_GUESS) {
1449       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1450       its--;
1451     }
1452     while (its--) {
1453       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       PetscCall(VecScale(mat->lvec, -1.0));
1458       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1459 
1460       /* local sweep */
1461       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1462     }
1463   } else if (flag & SOR_EISENSTAT) {
1464     Vec xx1;
1465 
1466     PetscCall(VecDuplicate(bb, &xx1));
1467     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1468 
1469     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1470     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1471     if (!mat->diag) {
1472       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1473       PetscCall(MatGetDiagonal(matin, mat->diag));
1474     }
1475     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1476     if (hasop) {
1477       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1478     } else {
1479       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1480     }
1481     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1482 
1483     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1484 
1485     /* local sweep */
1486     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1487     PetscCall(VecAXPY(xx, 1.0, xx1));
1488     PetscCall(VecDestroy(&xx1));
1489   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1490 
1491   PetscCall(VecDestroy(&bb1));
1492 
1493   matin->factorerrortype = mat->A->factorerrortype;
1494   PetscFunctionReturn(PETSC_SUCCESS);
1495 }
1496 
1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1498 {
1499   Mat             aA, aB, Aperm;
1500   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1501   PetscScalar    *aa, *ba;
1502   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1503   PetscSF         rowsf, sf;
1504   IS              parcolp = NULL;
1505   PetscBool       done;
1506 
1507   PetscFunctionBegin;
1508   PetscCall(MatGetLocalSize(A, &m, &n));
1509   PetscCall(ISGetIndices(rowp, &rwant));
1510   PetscCall(ISGetIndices(colp, &cwant));
1511   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1512 
1513   /* Invert row permutation to find out where my rows should go */
1514   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1515   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1516   PetscCall(PetscSFSetFromOptions(rowsf));
1517   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1518   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1519   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1520 
1521   /* Invert column permutation to find out where my columns should go */
1522   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1523   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1524   PetscCall(PetscSFSetFromOptions(sf));
1525   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1526   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1527   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1528   PetscCall(PetscSFDestroy(&sf));
1529 
1530   PetscCall(ISRestoreIndices(rowp, &rwant));
1531   PetscCall(ISRestoreIndices(colp, &cwant));
1532   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1533 
1534   /* Find out where my gcols should go */
1535   PetscCall(MatGetSize(aB, NULL, &ng));
1536   PetscCall(PetscMalloc1(ng, &gcdest));
1537   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1538   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1539   PetscCall(PetscSFSetFromOptions(sf));
1540   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1541   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1542   PetscCall(PetscSFDestroy(&sf));
1543 
1544   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1545   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1546   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1547   for (i = 0; i < m; i++) {
1548     PetscInt    row = rdest[i];
1549     PetscMPIInt rowner;
1550     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1551     for (j = ai[i]; j < ai[i + 1]; j++) {
1552       PetscInt    col = cdest[aj[j]];
1553       PetscMPIInt cowner;
1554       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1555       if (rowner == cowner) dnnz[i]++;
1556       else onnz[i]++;
1557     }
1558     for (j = bi[i]; j < bi[i + 1]; j++) {
1559       PetscInt    col = gcdest[bj[j]];
1560       PetscMPIInt cowner;
1561       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1562       if (rowner == cowner) dnnz[i]++;
1563       else onnz[i]++;
1564     }
1565   }
1566   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1567   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1568   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1570   PetscCall(PetscSFDestroy(&rowsf));
1571 
1572   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1573   PetscCall(MatSeqAIJGetArray(aA, &aa));
1574   PetscCall(MatSeqAIJGetArray(aB, &ba));
1575   for (i = 0; i < m; i++) {
1576     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1577     PetscInt  j0, rowlen;
1578     rowlen = ai[i + 1] - ai[i];
1579     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1580       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1581       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1582     }
1583     rowlen = bi[i + 1] - bi[i];
1584     for (j0 = j = 0; j < rowlen; j0 = j) {
1585       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1586       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1587     }
1588   }
1589   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1590   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1591   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1592   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1593   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1594   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1595   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1596   PetscCall(PetscFree3(work, rdest, cdest));
1597   PetscCall(PetscFree(gcdest));
1598   if (parcolp) PetscCall(ISDestroy(&colp));
1599   *B = Aperm;
1600   PetscFunctionReturn(PETSC_SUCCESS);
1601 }
1602 
1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1604 {
1605   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1606 
1607   PetscFunctionBegin;
1608   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1609   if (ghosts) *ghosts = aij->garray;
1610   PetscFunctionReturn(PETSC_SUCCESS);
1611 }
1612 
1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1614 {
1615   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1616   Mat            A = mat->A, B = mat->B;
1617   PetscLogDouble isend[5], irecv[5];
1618 
1619   PetscFunctionBegin;
1620   info->block_size = 1.0;
1621   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1622 
1623   isend[0] = info->nz_used;
1624   isend[1] = info->nz_allocated;
1625   isend[2] = info->nz_unneeded;
1626   isend[3] = info->memory;
1627   isend[4] = info->mallocs;
1628 
1629   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1630 
1631   isend[0] += info->nz_used;
1632   isend[1] += info->nz_allocated;
1633   isend[2] += info->nz_unneeded;
1634   isend[3] += info->memory;
1635   isend[4] += info->mallocs;
1636   if (flag == MAT_LOCAL) {
1637     info->nz_used      = isend[0];
1638     info->nz_allocated = isend[1];
1639     info->nz_unneeded  = isend[2];
1640     info->memory       = isend[3];
1641     info->mallocs      = isend[4];
1642   } else if (flag == MAT_GLOBAL_MAX) {
1643     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1644 
1645     info->nz_used      = irecv[0];
1646     info->nz_allocated = irecv[1];
1647     info->nz_unneeded  = irecv[2];
1648     info->memory       = irecv[3];
1649     info->mallocs      = irecv[4];
1650   } else if (flag == MAT_GLOBAL_SUM) {
1651     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1652 
1653     info->nz_used      = irecv[0];
1654     info->nz_allocated = irecv[1];
1655     info->nz_unneeded  = irecv[2];
1656     info->memory       = irecv[3];
1657     info->mallocs      = irecv[4];
1658   }
1659   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1660   info->fill_ratio_needed = 0;
1661   info->factor_mallocs    = 0;
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1666 {
1667   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1668 
1669   PetscFunctionBegin;
1670   switch (op) {
1671   case MAT_NEW_NONZERO_LOCATIONS:
1672   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1673   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1674   case MAT_KEEP_NONZERO_PATTERN:
1675   case MAT_NEW_NONZERO_LOCATION_ERR:
1676   case MAT_USE_INODES:
1677   case MAT_IGNORE_ZERO_ENTRIES:
1678   case MAT_FORM_EXPLICIT_TRANSPOSE:
1679     MatCheckPreallocated(A, 1);
1680     PetscCall(MatSetOption(a->A, op, flg));
1681     PetscCall(MatSetOption(a->B, op, flg));
1682     break;
1683   case MAT_ROW_ORIENTED:
1684     MatCheckPreallocated(A, 1);
1685     a->roworiented = flg;
1686 
1687     PetscCall(MatSetOption(a->A, op, flg));
1688     PetscCall(MatSetOption(a->B, op, flg));
1689     break;
1690   case MAT_IGNORE_OFF_PROC_ENTRIES:
1691     a->donotstash = flg;
1692     break;
1693   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1694   case MAT_SPD:
1695   case MAT_SYMMETRIC:
1696   case MAT_STRUCTURALLY_SYMMETRIC:
1697   case MAT_HERMITIAN:
1698   case MAT_SYMMETRY_ETERNAL:
1699   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1700   case MAT_SPD_ETERNAL:
1701     /* if the diagonal matrix is square it inherits some of the properties above */
1702     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1703     break;
1704   case MAT_SUBMAT_SINGLEIS:
1705     A->submat_singleis = flg;
1706     break;
1707   default:
1708     break;
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1714 {
1715   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1716   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1717   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1718   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1719   PetscInt    *cmap, *idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1730     PetscInt    max = 1, tmp;
1731     for (i = 0; i < matin->rmap->n; i++) {
1732       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1736   }
1737 
1738   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA;
1742   pcA = &cworkA;
1743   pvB = &vworkB;
1744   pcB = &cworkB;
1745   if (!v) {
1746     pvA = NULL;
1747     pvB = NULL;
1748   }
1749   if (!idx) {
1750     pcA = NULL;
1751     if (!v) pcB = NULL;
1752   }
1753   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1754   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1755   nztot = nzA + nzB;
1756 
1757   cmap = mat->garray;
1758   if (v || idx) {
1759     if (nztot) {
1760       /* Sort by increasing column numbers, assuming A and B already sorted */
1761       PetscInt imark = -1;
1762       if (v) {
1763         *v = v_p = mat->rowvalues;
1764         for (i = 0; i < nzB; i++) {
1765           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1766           else break;
1767         }
1768         imark = i;
1769         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1770         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1771       }
1772       if (idx) {
1773         *idx = idx_p = mat->rowindices;
1774         if (imark > -1) {
1775           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1776         } else {
1777           for (i = 0; i < nzB; i++) {
1778             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1779             else break;
1780           }
1781           imark = i;
1782         }
1783         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1784         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1785       }
1786     } else {
1787       if (idx) *idx = NULL;
1788       if (v) *v = NULL;
1789     }
1790   }
1791   *nz = nztot;
1792   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1793   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1798 {
1799   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1800 
1801   PetscFunctionBegin;
1802   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1803   aij->getrowactive = PETSC_FALSE;
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1808 {
1809   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1810   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1811   PetscInt         i, j, cstart = mat->cmap->rstart;
1812   PetscReal        sum = 0.0;
1813   const MatScalar *v, *amata, *bmata;
1814 
1815   PetscFunctionBegin;
1816   if (aij->size == 1) {
1817     PetscCall(MatNorm(aij->A, type, norm));
1818   } else {
1819     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1820     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1821     if (type == NORM_FROBENIUS) {
1822       v = amata;
1823       for (i = 0; i < amat->nz; i++) {
1824         sum += PetscRealPart(PetscConj(*v) * (*v));
1825         v++;
1826       }
1827       v = bmata;
1828       for (i = 0; i < bmat->nz; i++) {
1829         sum += PetscRealPart(PetscConj(*v) * (*v));
1830         v++;
1831       }
1832       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1833       *norm = PetscSqrtReal(*norm);
1834       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1835     } else if (type == NORM_1) { /* max column norm */
1836       PetscReal *tmp;
1837       PetscInt  *jj, *garray = aij->garray;
1838       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1839       *norm = 0.0;
1840       v     = amata;
1841       jj    = amat->j;
1842       for (j = 0; j < amat->nz; j++) {
1843         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1844         v++;
1845       }
1846       v  = bmata;
1847       jj = bmat->j;
1848       for (j = 0; j < bmat->nz; j++) {
1849         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1850         v++;
1851       }
1852       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1853       for (j = 0; j < mat->cmap->N; j++) {
1854         if (tmp[j] > *norm) *norm = tmp[j];
1855       }
1856       PetscCall(PetscFree(tmp));
1857       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1858     } else if (type == NORM_INFINITY) { /* max row norm */
1859       PetscReal ntemp = 0.0;
1860       for (j = 0; j < aij->A->rmap->n; j++) {
1861         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1862         sum = 0.0;
1863         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1864           sum += PetscAbsScalar(*v);
1865           v++;
1866         }
1867         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1868         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1869           sum += PetscAbsScalar(*v);
1870           v++;
1871         }
1872         if (sum > ntemp) ntemp = sum;
1873       }
1874       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1875       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1876     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1877     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1878     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1879   }
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1884 {
1885   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1886   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1887   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1888   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1889   Mat              B, A_diag, *B_diag;
1890   const MatScalar *pbv, *bv;
1891 
1892   PetscFunctionBegin;
1893   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1894   ma = A->rmap->n;
1895   na = A->cmap->n;
1896   mb = a->B->rmap->n;
1897   nb = a->B->cmap->n;
1898   ai = Aloc->i;
1899   aj = Aloc->j;
1900   bi = Bloc->i;
1901   bj = Bloc->j;
1902   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1903     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1904     PetscSFNode         *oloc;
1905     PETSC_UNUSED PetscSF sf;
1906 
1907     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1908     /* compute d_nnz for preallocation */
1909     PetscCall(PetscArrayzero(d_nnz, na));
1910     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1911     /* compute local off-diagonal contributions */
1912     PetscCall(PetscArrayzero(g_nnz, nb));
1913     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1914     /* map those to global */
1915     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1916     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1917     PetscCall(PetscSFSetFromOptions(sf));
1918     PetscCall(PetscArrayzero(o_nnz, na));
1919     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1920     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1921     PetscCall(PetscSFDestroy(&sf));
1922 
1923     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1924     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1925     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1926     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1927     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1928     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1929   } else {
1930     B = *matout;
1931     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1932   }
1933 
1934   b           = (Mat_MPIAIJ *)B->data;
1935   A_diag      = a->A;
1936   B_diag      = &b->A;
1937   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1938   A_diag_ncol = A_diag->cmap->N;
1939   B_diag_ilen = sub_B_diag->ilen;
1940   B_diag_i    = sub_B_diag->i;
1941 
1942   /* Set ilen for diagonal of B */
1943   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1944 
1945   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1946   very quickly (=without using MatSetValues), because all writes are local. */
1947   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1948   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1949 
1950   /* copy over the B part */
1951   PetscCall(PetscMalloc1(bi[mb], &cols));
1952   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1953   pbv = bv;
1954   row = A->rmap->rstart;
1955   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1956   cols_tmp = cols;
1957   for (i = 0; i < mb; i++) {
1958     ncol = bi[i + 1] - bi[i];
1959     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1960     row++;
1961     if (pbv) pbv += ncol;
1962     if (cols_tmp) cols_tmp += ncol;
1963   }
1964   PetscCall(PetscFree(cols));
1965   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1966 
1967   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1968   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1970     *matout = B;
1971   } else {
1972     PetscCall(MatHeaderMerge(A, &B));
1973   }
1974   PetscFunctionReturn(PETSC_SUCCESS);
1975 }
1976 
1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1978 {
1979   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1980   Mat         a = aij->A, b = aij->B;
1981   PetscInt    s1, s2, s3;
1982 
1983   PetscFunctionBegin;
1984   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1985   if (rr) {
1986     PetscCall(VecGetLocalSize(rr, &s1));
1987     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1988     /* Overlap communication with computation. */
1989     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1990   }
1991   if (ll) {
1992     PetscCall(VecGetLocalSize(ll, &s1));
1993     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1994     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1995   }
1996   /* scale  the diagonal block */
1997   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1998 
1999   if (rr) {
2000     /* Do a scatter end and then right scale the off-diagonal block */
2001     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2002     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2003   }
2004   PetscFunctionReturn(PETSC_SUCCESS);
2005 }
2006 
2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2008 {
2009   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2010 
2011   PetscFunctionBegin;
2012   PetscCall(MatSetUnfactored(a->A));
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2017 {
2018   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2019   Mat         a, b, c, d;
2020   PetscBool   flg;
2021 
2022   PetscFunctionBegin;
2023   a = matA->A;
2024   b = matA->B;
2025   c = matB->A;
2026   d = matB->B;
2027 
2028   PetscCall(MatEqual(a, c, &flg));
2029   if (flg) PetscCall(MatEqual(b, d, &flg));
2030   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2035 {
2036   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2037   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2038 
2039   PetscFunctionBegin;
2040   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2041   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2042     /* because of the column compression in the off-processor part of the matrix a->B,
2043        the number of columns in a->B and b->B may be different, hence we cannot call
2044        the MatCopy() directly on the two parts. If need be, we can provide a more
2045        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2046        then copying the submatrices */
2047     PetscCall(MatCopy_Basic(A, B, str));
2048   } else {
2049     PetscCall(MatCopy(a->A, b->A, str));
2050     PetscCall(MatCopy(a->B, b->B, str));
2051   }
2052   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
2056 /*
2057    Computes the number of nonzeros per row needed for preallocation when X and Y
2058    have different nonzero structure.
2059 */
2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2061 {
2062   PetscInt i, j, k, nzx, nzy;
2063 
2064   PetscFunctionBegin;
2065   /* Set the number of nonzeros in the new matrix */
2066   for (i = 0; i < m; i++) {
2067     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2068     nzx    = xi[i + 1] - xi[i];
2069     nzy    = yi[i + 1] - yi[i];
2070     nnz[i] = 0;
2071     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2072       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2073       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2074       nnz[i]++;
2075     }
2076     for (; k < nzy; k++) nnz[i]++;
2077   }
2078   PetscFunctionReturn(PETSC_SUCCESS);
2079 }
2080 
2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2083 {
2084   PetscInt    m = Y->rmap->N;
2085   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2086   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2087 
2088   PetscFunctionBegin;
2089   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2090   PetscFunctionReturn(PETSC_SUCCESS);
2091 }
2092 
2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2094 {
2095   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2096 
2097   PetscFunctionBegin;
2098   if (str == SAME_NONZERO_PATTERN) {
2099     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2100     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2101   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2102     PetscCall(MatAXPY_Basic(Y, a, X, str));
2103   } else {
2104     Mat       B;
2105     PetscInt *nnz_d, *nnz_o;
2106 
2107     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2108     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2109     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2110     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2111     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2112     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2113     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2114     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2115     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2116     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2117     PetscCall(MatHeaderMerge(Y, &B));
2118     PetscCall(PetscFree(nnz_d));
2119     PetscCall(PetscFree(nnz_o));
2120   }
2121   PetscFunctionReturn(PETSC_SUCCESS);
2122 }
2123 
2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2125 
2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2127 {
2128   PetscFunctionBegin;
2129   if (PetscDefined(USE_COMPLEX)) {
2130     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2131 
2132     PetscCall(MatConjugate_SeqAIJ(aij->A));
2133     PetscCall(MatConjugate_SeqAIJ(aij->B));
2134   }
2135   PetscFunctionReturn(PETSC_SUCCESS);
2136 }
2137 
2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2139 {
2140   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2141 
2142   PetscFunctionBegin;
2143   PetscCall(MatRealPart(a->A));
2144   PetscCall(MatRealPart(a->B));
2145   PetscFunctionReturn(PETSC_SUCCESS);
2146 }
2147 
2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2149 {
2150   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2151 
2152   PetscFunctionBegin;
2153   PetscCall(MatImaginaryPart(a->A));
2154   PetscCall(MatImaginaryPart(a->B));
2155   PetscFunctionReturn(PETSC_SUCCESS);
2156 }
2157 
2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2159 {
2160   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2161   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2162   PetscScalar       *vv;
2163   Vec                vB, vA;
2164   const PetscScalar *va, *vb;
2165 
2166   PetscFunctionBegin;
2167   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2168   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2169 
2170   PetscCall(VecGetArrayRead(vA, &va));
2171   if (idx) {
2172     for (i = 0; i < m; i++) {
2173       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2174     }
2175   }
2176 
2177   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2178   PetscCall(PetscMalloc1(m, &idxb));
2179   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2180 
2181   PetscCall(VecGetArrayWrite(v, &vv));
2182   PetscCall(VecGetArrayRead(vB, &vb));
2183   for (i = 0; i < m; i++) {
2184     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2185       vv[i] = vb[i];
2186       if (idx) idx[i] = a->garray[idxb[i]];
2187     } else {
2188       vv[i] = va[i];
2189       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2190     }
2191   }
2192   PetscCall(VecRestoreArrayWrite(v, &vv));
2193   PetscCall(VecRestoreArrayRead(vA, &va));
2194   PetscCall(VecRestoreArrayRead(vB, &vb));
2195   PetscCall(PetscFree(idxb));
2196   PetscCall(VecDestroy(&vA));
2197   PetscCall(VecDestroy(&vB));
2198   PetscFunctionReturn(PETSC_SUCCESS);
2199 }
2200 
2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2202 {
2203   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2204   Vec         vB, vA;
2205 
2206   PetscFunctionBegin;
2207   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2208   PetscCall(MatGetRowSumAbs(a->A, vA));
2209   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2210   PetscCall(MatGetRowSumAbs(a->B, vB));
2211   PetscCall(VecAXPY(vA, 1.0, vB));
2212   PetscCall(VecDestroy(&vB));
2213   PetscCall(VecCopy(vA, v));
2214   PetscCall(VecDestroy(&vA));
2215   PetscFunctionReturn(PETSC_SUCCESS);
2216 }
2217 
2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2219 {
2220   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2221   PetscInt           m = A->rmap->n, n = A->cmap->n;
2222   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2223   PetscInt          *cmap = mat->garray;
2224   PetscInt          *diagIdx, *offdiagIdx;
2225   Vec                diagV, offdiagV;
2226   PetscScalar       *a, *diagA, *offdiagA;
2227   const PetscScalar *ba, *bav;
2228   PetscInt           r, j, col, ncols, *bi, *bj;
2229   Mat                B = mat->B;
2230   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2231 
2232   PetscFunctionBegin;
2233   /* When a process holds entire A and other processes have no entry */
2234   if (A->cmap->N == n) {
2235     PetscCall(VecGetArrayWrite(v, &diagA));
2236     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2237     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2238     PetscCall(VecDestroy(&diagV));
2239     PetscCall(VecRestoreArrayWrite(v, &diagA));
2240     PetscFunctionReturn(PETSC_SUCCESS);
2241   } else if (n == 0) {
2242     if (m) {
2243       PetscCall(VecGetArrayWrite(v, &a));
2244       for (r = 0; r < m; r++) {
2245         a[r] = 0.0;
2246         if (idx) idx[r] = -1;
2247       }
2248       PetscCall(VecRestoreArrayWrite(v, &a));
2249     }
2250     PetscFunctionReturn(PETSC_SUCCESS);
2251   }
2252 
2253   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2255   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2256   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2257 
2258   /* Get offdiagIdx[] for implicit 0.0 */
2259   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2260   ba = bav;
2261   bi = b->i;
2262   bj = b->j;
2263   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2264   for (r = 0; r < m; r++) {
2265     ncols = bi[r + 1] - bi[r];
2266     if (ncols == A->cmap->N - n) { /* Brow is dense */
2267       offdiagA[r]   = *ba;
2268       offdiagIdx[r] = cmap[0];
2269     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2270       offdiagA[r] = 0.0;
2271 
2272       /* Find first hole in the cmap */
2273       for (j = 0; j < ncols; j++) {
2274         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2275         if (col > j && j < cstart) {
2276           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2277           break;
2278         } else if (col > j + n && j >= cstart) {
2279           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2280           break;
2281         }
2282       }
2283       if (j == ncols && ncols < A->cmap->N - n) {
2284         /* a hole is outside compressed Bcols */
2285         if (ncols == 0) {
2286           if (cstart) {
2287             offdiagIdx[r] = 0;
2288           } else offdiagIdx[r] = cend;
2289         } else { /* ncols > 0 */
2290           offdiagIdx[r] = cmap[ncols - 1] + 1;
2291           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2292         }
2293       }
2294     }
2295 
2296     for (j = 0; j < ncols; j++) {
2297       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2298         offdiagA[r]   = *ba;
2299         offdiagIdx[r] = cmap[*bj];
2300       }
2301       ba++;
2302       bj++;
2303     }
2304   }
2305 
2306   PetscCall(VecGetArrayWrite(v, &a));
2307   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2308   for (r = 0; r < m; ++r) {
2309     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) idx[r] = cstart + diagIdx[r];
2312     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2313       a[r] = diagA[r];
2314       if (idx) {
2315         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2316           idx[r] = cstart + diagIdx[r];
2317         } else idx[r] = offdiagIdx[r];
2318       }
2319     } else {
2320       a[r] = offdiagA[r];
2321       if (idx) idx[r] = offdiagIdx[r];
2322     }
2323   }
2324   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2325   PetscCall(VecRestoreArrayWrite(v, &a));
2326   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2327   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2328   PetscCall(VecDestroy(&diagV));
2329   PetscCall(VecDestroy(&offdiagV));
2330   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2331   PetscFunctionReturn(PETSC_SUCCESS);
2332 }
2333 
2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2335 {
2336   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2337   PetscInt           m = A->rmap->n, n = A->cmap->n;
2338   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2339   PetscInt          *cmap = mat->garray;
2340   PetscInt          *diagIdx, *offdiagIdx;
2341   Vec                diagV, offdiagV;
2342   PetscScalar       *a, *diagA, *offdiagA;
2343   const PetscScalar *ba, *bav;
2344   PetscInt           r, j, col, ncols, *bi, *bj;
2345   Mat                B = mat->B;
2346   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2347 
2348   PetscFunctionBegin;
2349   /* When a process holds entire A and other processes have no entry */
2350   if (A->cmap->N == n) {
2351     PetscCall(VecGetArrayWrite(v, &diagA));
2352     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2353     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2354     PetscCall(VecDestroy(&diagV));
2355     PetscCall(VecRestoreArrayWrite(v, &diagA));
2356     PetscFunctionReturn(PETSC_SUCCESS);
2357   } else if (n == 0) {
2358     if (m) {
2359       PetscCall(VecGetArrayWrite(v, &a));
2360       for (r = 0; r < m; r++) {
2361         a[r] = PETSC_MAX_REAL;
2362         if (idx) idx[r] = -1;
2363       }
2364       PetscCall(VecRestoreArrayWrite(v, &a));
2365     }
2366     PetscFunctionReturn(PETSC_SUCCESS);
2367   }
2368 
2369   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2371   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2372   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2373 
2374   /* Get offdiagIdx[] for implicit 0.0 */
2375   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2376   ba = bav;
2377   bi = b->i;
2378   bj = b->j;
2379   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2380   for (r = 0; r < m; r++) {
2381     ncols = bi[r + 1] - bi[r];
2382     if (ncols == A->cmap->N - n) { /* Brow is dense */
2383       offdiagA[r]   = *ba;
2384       offdiagIdx[r] = cmap[0];
2385     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2386       offdiagA[r] = 0.0;
2387 
2388       /* Find first hole in the cmap */
2389       for (j = 0; j < ncols; j++) {
2390         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2391         if (col > j && j < cstart) {
2392           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2393           break;
2394         } else if (col > j + n && j >= cstart) {
2395           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2396           break;
2397         }
2398       }
2399       if (j == ncols && ncols < A->cmap->N - n) {
2400         /* a hole is outside compressed Bcols */
2401         if (ncols == 0) {
2402           if (cstart) {
2403             offdiagIdx[r] = 0;
2404           } else offdiagIdx[r] = cend;
2405         } else { /* ncols > 0 */
2406           offdiagIdx[r] = cmap[ncols - 1] + 1;
2407           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2408         }
2409       }
2410     }
2411 
2412     for (j = 0; j < ncols; j++) {
2413       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2414         offdiagA[r]   = *ba;
2415         offdiagIdx[r] = cmap[*bj];
2416       }
2417       ba++;
2418       bj++;
2419     }
2420   }
2421 
2422   PetscCall(VecGetArrayWrite(v, &a));
2423   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2424   for (r = 0; r < m; ++r) {
2425     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2426       a[r] = diagA[r];
2427       if (idx) idx[r] = cstart + diagIdx[r];
2428     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2429       a[r] = diagA[r];
2430       if (idx) {
2431         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2432           idx[r] = cstart + diagIdx[r];
2433         } else idx[r] = offdiagIdx[r];
2434       }
2435     } else {
2436       a[r] = offdiagA[r];
2437       if (idx) idx[r] = offdiagIdx[r];
2438     }
2439   }
2440   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2441   PetscCall(VecRestoreArrayWrite(v, &a));
2442   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2443   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2444   PetscCall(VecDestroy(&diagV));
2445   PetscCall(VecDestroy(&offdiagV));
2446   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2447   PetscFunctionReturn(PETSC_SUCCESS);
2448 }
2449 
2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2451 {
2452   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2453   PetscInt           m = A->rmap->n, n = A->cmap->n;
2454   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2455   PetscInt          *cmap = mat->garray;
2456   PetscInt          *diagIdx, *offdiagIdx;
2457   Vec                diagV, offdiagV;
2458   PetscScalar       *a, *diagA, *offdiagA;
2459   const PetscScalar *ba, *bav;
2460   PetscInt           r, j, col, ncols, *bi, *bj;
2461   Mat                B = mat->B;
2462   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2463 
2464   PetscFunctionBegin;
2465   /* When a process holds entire A and other processes have no entry */
2466   if (A->cmap->N == n) {
2467     PetscCall(VecGetArrayWrite(v, &diagA));
2468     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2469     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2470     PetscCall(VecDestroy(&diagV));
2471     PetscCall(VecRestoreArrayWrite(v, &diagA));
2472     PetscFunctionReturn(PETSC_SUCCESS);
2473   } else if (n == 0) {
2474     if (m) {
2475       PetscCall(VecGetArrayWrite(v, &a));
2476       for (r = 0; r < m; r++) {
2477         a[r] = PETSC_MIN_REAL;
2478         if (idx) idx[r] = -1;
2479       }
2480       PetscCall(VecRestoreArrayWrite(v, &a));
2481     }
2482     PetscFunctionReturn(PETSC_SUCCESS);
2483   }
2484 
2485   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2487   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2488   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2489 
2490   /* Get offdiagIdx[] for implicit 0.0 */
2491   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2492   ba = bav;
2493   bi = b->i;
2494   bj = b->j;
2495   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2496   for (r = 0; r < m; r++) {
2497     ncols = bi[r + 1] - bi[r];
2498     if (ncols == A->cmap->N - n) { /* Brow is dense */
2499       offdiagA[r]   = *ba;
2500       offdiagIdx[r] = cmap[0];
2501     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2502       offdiagA[r] = 0.0;
2503 
2504       /* Find first hole in the cmap */
2505       for (j = 0; j < ncols; j++) {
2506         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2507         if (col > j && j < cstart) {
2508           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2509           break;
2510         } else if (col > j + n && j >= cstart) {
2511           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2512           break;
2513         }
2514       }
2515       if (j == ncols && ncols < A->cmap->N - n) {
2516         /* a hole is outside compressed Bcols */
2517         if (ncols == 0) {
2518           if (cstart) {
2519             offdiagIdx[r] = 0;
2520           } else offdiagIdx[r] = cend;
2521         } else { /* ncols > 0 */
2522           offdiagIdx[r] = cmap[ncols - 1] + 1;
2523           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2524         }
2525       }
2526     }
2527 
2528     for (j = 0; j < ncols; j++) {
2529       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2530         offdiagA[r]   = *ba;
2531         offdiagIdx[r] = cmap[*bj];
2532       }
2533       ba++;
2534       bj++;
2535     }
2536   }
2537 
2538   PetscCall(VecGetArrayWrite(v, &a));
2539   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2540   for (r = 0; r < m; ++r) {
2541     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2542       a[r] = diagA[r];
2543       if (idx) idx[r] = cstart + diagIdx[r];
2544     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2545       a[r] = diagA[r];
2546       if (idx) {
2547         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2548           idx[r] = cstart + diagIdx[r];
2549         } else idx[r] = offdiagIdx[r];
2550       }
2551     } else {
2552       a[r] = offdiagA[r];
2553       if (idx) idx[r] = offdiagIdx[r];
2554     }
2555   }
2556   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2557   PetscCall(VecRestoreArrayWrite(v, &a));
2558   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2559   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2560   PetscCall(VecDestroy(&diagV));
2561   PetscCall(VecDestroy(&offdiagV));
2562   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2567 {
2568   Mat *dummy;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2572   *newmat = *dummy;
2573   PetscCall(PetscFree(dummy));
2574   PetscFunctionReturn(PETSC_SUCCESS);
2575 }
2576 
2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2578 {
2579   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2580 
2581   PetscFunctionBegin;
2582   PetscCall(MatInvertBlockDiagonal(a->A, values));
2583   A->factorerrortype = a->A->factorerrortype;
2584   PetscFunctionReturn(PETSC_SUCCESS);
2585 }
2586 
2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2588 {
2589   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2590 
2591   PetscFunctionBegin;
2592   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2593   PetscCall(MatSetRandom(aij->A, rctx));
2594   if (x->assembled) {
2595     PetscCall(MatSetRandom(aij->B, rctx));
2596   } else {
2597     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2598   }
2599   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2600   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2601   PetscFunctionReturn(PETSC_SUCCESS);
2602 }
2603 
2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2605 {
2606   PetscFunctionBegin;
2607   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2608   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2609   PetscFunctionReturn(PETSC_SUCCESS);
2610 }
2611 
2612 /*@
2613   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2614 
2615   Not Collective
2616 
2617   Input Parameter:
2618 . A - the matrix
2619 
2620   Output Parameter:
2621 . nz - the number of nonzeros
2622 
2623   Level: advanced
2624 
2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2626 @*/
2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2628 {
2629   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2630   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2631   PetscBool   isaij;
2632 
2633   PetscFunctionBegin;
2634   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2635   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2636   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2637   PetscFunctionReturn(PETSC_SUCCESS);
2638 }
2639 
2640 /*@
2641   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2642 
2643   Collective
2644 
2645   Input Parameters:
2646 + A  - the matrix
2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2648 
2649   Level: advanced
2650 
2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2652 @*/
2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2654 {
2655   PetscFunctionBegin;
2656   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject)
2661 {
2662   PetscBool sc = PETSC_FALSE, flg;
2663 
2664   PetscFunctionBegin;
2665   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2666   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2667   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2668   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2669   PetscOptionsHeadEnd();
2670   PetscFunctionReturn(PETSC_SUCCESS);
2671 }
2672 
2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2674 {
2675   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2676   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2677 
2678   PetscFunctionBegin;
2679   if (!Y->preallocated) {
2680     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2681   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2682     PetscInt nonew = aij->nonew;
2683     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2684     aij->nonew = nonew;
2685   }
2686   PetscCall(MatShift_Basic(Y, a));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2691 {
2692   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2696   PetscCall(MatMissingDiagonal(a->A, missing, d));
2697   if (d) {
2698     PetscInt rstart;
2699     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2700     *d += rstart;
2701   }
2702   PetscFunctionReturn(PETSC_SUCCESS);
2703 }
2704 
2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2706 {
2707   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2708 
2709   PetscFunctionBegin;
2710   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2711   PetscFunctionReturn(PETSC_SUCCESS);
2712 }
2713 
2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2715 {
2716   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2717 
2718   PetscFunctionBegin;
2719   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2720   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2725                                        MatGetRow_MPIAIJ,
2726                                        MatRestoreRow_MPIAIJ,
2727                                        MatMult_MPIAIJ,
2728                                        /* 4*/ MatMultAdd_MPIAIJ,
2729                                        MatMultTranspose_MPIAIJ,
2730                                        MatMultTransposeAdd_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        /*10*/ NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        MatSOR_MPIAIJ,
2738                                        MatTranspose_MPIAIJ,
2739                                        /*15*/ MatGetInfo_MPIAIJ,
2740                                        MatEqual_MPIAIJ,
2741                                        MatGetDiagonal_MPIAIJ,
2742                                        MatDiagonalScale_MPIAIJ,
2743                                        MatNorm_MPIAIJ,
2744                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2745                                        MatAssemblyEnd_MPIAIJ,
2746                                        MatSetOption_MPIAIJ,
2747                                        MatZeroEntries_MPIAIJ,
2748                                        /*24*/ MatZeroRows_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        /*29*/ MatSetUp_MPI_Hash,
2754                                        NULL,
2755                                        NULL,
2756                                        MatGetDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        /*34*/ MatDuplicate_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        /*39*/ MatAXPY_MPIAIJ,
2764                                        MatCreateSubMatrices_MPIAIJ,
2765                                        MatIncreaseOverlap_MPIAIJ,
2766                                        MatGetValues_MPIAIJ,
2767                                        MatCopy_MPIAIJ,
2768                                        /*44*/ MatGetRowMax_MPIAIJ,
2769                                        MatScale_MPIAIJ,
2770                                        MatShift_MPIAIJ,
2771                                        MatDiagonalSet_MPIAIJ,
2772                                        MatZeroRowsColumns_MPIAIJ,
2773                                        /*49*/ MatSetRandom_MPIAIJ,
2774                                        MatGetRowIJ_MPIAIJ,
2775                                        MatRestoreRowIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2779                                        NULL,
2780                                        MatSetUnfactored_MPIAIJ,
2781                                        MatPermute_MPIAIJ,
2782                                        NULL,
2783                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2784                                        MatDestroy_MPIAIJ,
2785                                        MatView_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        /*64*/ NULL,
2789                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2794                                        MatGetRowMinAbs_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*75*/ MatFDColoringApply_AIJ,
2800                                        MatSetFromOptions_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        MatFindZeroDiagonals_MPIAIJ,
2804                                        /*80*/ NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        /*83*/ MatLoad_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        /*89*/ NULL,
2814                                        NULL,
2815                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        MatBindToCPU_MPIAIJ,
2823                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        MatConjugate_MPIAIJ,
2827                                        NULL,
2828                                        /*104*/ MatSetValuesRow_MPIAIJ,
2829                                        MatRealPart_MPIAIJ,
2830                                        MatImaginaryPart_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        /*109*/ NULL,
2834                                        NULL,
2835                                        MatGetRowMin_MPIAIJ,
2836                                        NULL,
2837                                        MatMissingDiagonal_MPIAIJ,
2838                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2839                                        NULL,
2840                                        MatGetGhosts_MPIAIJ,
2841                                        NULL,
2842                                        NULL,
2843                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2844                                        NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        MatGetMultiProcBlock_MPIAIJ,
2848                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2849                                        MatGetColumnReductions_MPIAIJ,
2850                                        MatInvertBlockDiagonal_MPIAIJ,
2851                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2852                                        MatCreateSubMatricesMPI_MPIAIJ,
2853                                        /*129*/ NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2857                                        NULL,
2858                                        /*134*/ NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        NULL,
2863                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2864                                        NULL,
2865                                        NULL,
2866                                        MatFDColoringSetUp_MPIXAIJ,
2867                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2868                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2869                                        /*145*/ NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        MatCreateGraph_Simple_AIJ,
2873                                        NULL,
2874                                        /*150*/ NULL,
2875                                        MatEliminateZeros_MPIAIJ,
2876                                        MatGetRowSumAbs_MPIAIJ,
2877                                        NULL,
2878                                        NULL,
2879                                        /*155*/ NULL,
2880                                        MatCopyHashToXAIJ_MPI_Hash};
2881 
2882 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2883 {
2884   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2885 
2886   PetscFunctionBegin;
2887   PetscCall(MatStoreValues(aij->A));
2888   PetscCall(MatStoreValues(aij->B));
2889   PetscFunctionReturn(PETSC_SUCCESS);
2890 }
2891 
2892 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2893 {
2894   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2895 
2896   PetscFunctionBegin;
2897   PetscCall(MatRetrieveValues(aij->A));
2898   PetscCall(MatRetrieveValues(aij->B));
2899   PetscFunctionReturn(PETSC_SUCCESS);
2900 }
2901 
2902 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2903 {
2904   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2905   PetscMPIInt size;
2906 
2907   PetscFunctionBegin;
2908   if (B->hash_active) {
2909     B->ops[0]      = b->cops;
2910     B->hash_active = PETSC_FALSE;
2911   }
2912   PetscCall(PetscLayoutSetUp(B->rmap));
2913   PetscCall(PetscLayoutSetUp(B->cmap));
2914 
2915 #if defined(PETSC_USE_CTABLE)
2916   PetscCall(PetscHMapIDestroy(&b->colmap));
2917 #else
2918   PetscCall(PetscFree(b->colmap));
2919 #endif
2920   PetscCall(PetscFree(b->garray));
2921   PetscCall(VecDestroy(&b->lvec));
2922   PetscCall(VecScatterDestroy(&b->Mvctx));
2923 
2924   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2925 
2926   MatSeqXAIJGetOptions_Private(b->B);
2927   PetscCall(MatDestroy(&b->B));
2928   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2929   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2930   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2931   PetscCall(MatSetType(b->B, MATSEQAIJ));
2932   MatSeqXAIJRestoreOptions_Private(b->B);
2933 
2934   MatSeqXAIJGetOptions_Private(b->A);
2935   PetscCall(MatDestroy(&b->A));
2936   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2937   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2938   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2939   PetscCall(MatSetType(b->A, MATSEQAIJ));
2940   MatSeqXAIJRestoreOptions_Private(b->A);
2941 
2942   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2943   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2944   B->preallocated  = PETSC_TRUE;
2945   B->was_assembled = PETSC_FALSE;
2946   B->assembled     = PETSC_FALSE;
2947   PetscFunctionReturn(PETSC_SUCCESS);
2948 }
2949 
2950 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2951 {
2952   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2953   /* Save the nonzero states of the component matrices because those are what are used to determine
2954     the nonzero state of mat */
2955   PetscObjectState diagstate = b->A->nonzerostate, offdiagstate = b->B->nonzerostate;
2956 
2957   PetscFunctionBegin;
2958   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2959   PetscCall(PetscLayoutSetUp(B->rmap));
2960   PetscCall(PetscLayoutSetUp(B->cmap));
2961   if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2962   else {
2963 #if defined(PETSC_USE_CTABLE)
2964     PetscCall(PetscHMapIDestroy(&b->colmap));
2965 #else
2966     PetscCall(PetscFree(b->colmap));
2967 #endif
2968     PetscCall(PetscFree(b->garray));
2969     PetscCall(VecDestroy(&b->lvec));
2970   }
2971   PetscCall(VecScatterDestroy(&b->Mvctx));
2972 
2973   PetscCall(MatResetPreallocation(b->A));
2974   PetscCall(MatResetPreallocation(b->B));
2975   B->preallocated    = PETSC_TRUE;
2976   B->was_assembled   = PETSC_FALSE;
2977   B->assembled       = PETSC_FALSE;
2978   b->A->nonzerostate = ++diagstate, b->B->nonzerostate = ++offdiagstate;
2979   /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */
2980   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2981   PetscFunctionReturn(PETSC_SUCCESS);
2982 }
2983 
2984 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2985 {
2986   Mat         mat;
2987   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2988 
2989   PetscFunctionBegin;
2990   *newmat = NULL;
2991   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2992   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2993   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2994   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2995   a = (Mat_MPIAIJ *)mat->data;
2996 
2997   mat->factortype = matin->factortype;
2998   mat->assembled  = matin->assembled;
2999   mat->insertmode = NOT_SET_VALUES;
3000 
3001   a->size         = oldmat->size;
3002   a->rank         = oldmat->rank;
3003   a->donotstash   = oldmat->donotstash;
3004   a->roworiented  = oldmat->roworiented;
3005   a->rowindices   = NULL;
3006   a->rowvalues    = NULL;
3007   a->getrowactive = PETSC_FALSE;
3008 
3009   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3010   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3011   if (matin->hash_active) {
3012     PetscCall(MatSetUp(mat));
3013   } else {
3014     mat->preallocated = matin->preallocated;
3015     if (oldmat->colmap) {
3016 #if defined(PETSC_USE_CTABLE)
3017       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3018 #else
3019       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3020       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3021 #endif
3022     } else a->colmap = NULL;
3023     if (oldmat->garray) {
3024       PetscInt len;
3025       len = oldmat->B->cmap->n;
3026       PetscCall(PetscMalloc1(len + 1, &a->garray));
3027       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3028     } else a->garray = NULL;
3029 
3030     /* It may happen MatDuplicate is called with a non-assembled matrix
3031       In fact, MatDuplicate only requires the matrix to be preallocated
3032       This may happen inside a DMCreateMatrix_Shell */
3033     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3034     if (oldmat->Mvctx) {
3035       a->Mvctx = oldmat->Mvctx;
3036       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3037     }
3038     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3039     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3040   }
3041   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3042   *newmat = mat;
3043   PetscFunctionReturn(PETSC_SUCCESS);
3044 }
3045 
3046 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3047 {
3048   PetscBool isbinary, ishdf5;
3049 
3050   PetscFunctionBegin;
3051   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3052   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3053   /* force binary viewer to load .info file if it has not yet done so */
3054   PetscCall(PetscViewerSetUp(viewer));
3055   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3056   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3057   if (isbinary) {
3058     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3059   } else if (ishdf5) {
3060 #if defined(PETSC_HAVE_HDF5)
3061     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3062 #else
3063     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3064 #endif
3065   } else {
3066     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3067   }
3068   PetscFunctionReturn(PETSC_SUCCESS);
3069 }
3070 
3071 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3072 {
3073   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3074   PetscInt    *rowidxs, *colidxs;
3075   PetscScalar *matvals;
3076 
3077   PetscFunctionBegin;
3078   PetscCall(PetscViewerSetUp(viewer));
3079 
3080   /* read in matrix header */
3081   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3082   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3083   M  = header[1];
3084   N  = header[2];
3085   nz = header[3];
3086   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3087   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3088   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3089 
3090   /* set block sizes from the viewer's .info file */
3091   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3092   /* set global sizes if not set already */
3093   if (mat->rmap->N < 0) mat->rmap->N = M;
3094   if (mat->cmap->N < 0) mat->cmap->N = N;
3095   PetscCall(PetscLayoutSetUp(mat->rmap));
3096   PetscCall(PetscLayoutSetUp(mat->cmap));
3097 
3098   /* check if the matrix sizes are correct */
3099   PetscCall(MatGetSize(mat, &rows, &cols));
3100   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3101 
3102   /* read in row lengths and build row indices */
3103   PetscCall(MatGetLocalSize(mat, &m, NULL));
3104   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3105   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3106   rowidxs[0] = 0;
3107   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3108   if (nz != PETSC_INT_MAX) {
3109     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3110     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3111   }
3112 
3113   /* read in column indices and matrix values */
3114   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3115   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3116   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3117   /* store matrix indices and values */
3118   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3119   PetscCall(PetscFree(rowidxs));
3120   PetscCall(PetscFree2(colidxs, matvals));
3121   PetscFunctionReturn(PETSC_SUCCESS);
3122 }
3123 
3124 /* Not scalable because of ISAllGather() unless getting all columns. */
3125 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3126 {
3127   IS          iscol_local;
3128   PetscBool   isstride;
3129   PetscMPIInt gisstride = 0;
3130 
3131   PetscFunctionBegin;
3132   /* check if we are grabbing all columns*/
3133   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3134 
3135   if (isstride) {
3136     PetscInt start, len, mstart, mlen;
3137     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3138     PetscCall(ISGetLocalSize(iscol, &len));
3139     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3140     if (mstart == start && mlen - mstart == len) gisstride = 1;
3141   }
3142 
3143   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3144   if (gisstride) {
3145     PetscInt N;
3146     PetscCall(MatGetSize(mat, NULL, &N));
3147     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3148     PetscCall(ISSetIdentity(iscol_local));
3149     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3150   } else {
3151     PetscInt cbs;
3152     PetscCall(ISGetBlockSize(iscol, &cbs));
3153     PetscCall(ISAllGather(iscol, &iscol_local));
3154     PetscCall(ISSetBlockSize(iscol_local, cbs));
3155   }
3156 
3157   *isseq = iscol_local;
3158   PetscFunctionReturn(PETSC_SUCCESS);
3159 }
3160 
3161 /*
3162  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3163  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3164 
3165  Input Parameters:
3166 +   mat - matrix
3167 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3168            i.e., mat->rstart <= isrow[i] < mat->rend
3169 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3170            i.e., mat->cstart <= iscol[i] < mat->cend
3171 
3172  Output Parameters:
3173 +   isrow_d - sequential row index set for retrieving mat->A
3174 .   iscol_d - sequential  column index set for retrieving mat->A
3175 .   iscol_o - sequential column index set for retrieving mat->B
3176 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3177  */
3178 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3179 {
3180   Vec             x, cmap;
3181   const PetscInt *is_idx;
3182   PetscScalar    *xarray, *cmaparray;
3183   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3184   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3185   Mat             B    = a->B;
3186   Vec             lvec = a->lvec, lcmap;
3187   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3188   MPI_Comm        comm;
3189   VecScatter      Mvctx = a->Mvctx;
3190 
3191   PetscFunctionBegin;
3192   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3193   PetscCall(ISGetLocalSize(iscol, &ncols));
3194 
3195   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3196   PetscCall(MatCreateVecs(mat, &x, NULL));
3197   PetscCall(VecSet(x, -1.0));
3198   PetscCall(VecDuplicate(x, &cmap));
3199   PetscCall(VecSet(cmap, -1.0));
3200 
3201   /* Get start indices */
3202   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3203   isstart -= ncols;
3204   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3205 
3206   PetscCall(ISGetIndices(iscol, &is_idx));
3207   PetscCall(VecGetArray(x, &xarray));
3208   PetscCall(VecGetArray(cmap, &cmaparray));
3209   PetscCall(PetscMalloc1(ncols, &idx));
3210   for (i = 0; i < ncols; i++) {
3211     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3212     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3213     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3214   }
3215   PetscCall(VecRestoreArray(x, &xarray));
3216   PetscCall(VecRestoreArray(cmap, &cmaparray));
3217   PetscCall(ISRestoreIndices(iscol, &is_idx));
3218 
3219   /* Get iscol_d */
3220   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3221   PetscCall(ISGetBlockSize(iscol, &i));
3222   PetscCall(ISSetBlockSize(*iscol_d, i));
3223 
3224   /* Get isrow_d */
3225   PetscCall(ISGetLocalSize(isrow, &m));
3226   rstart = mat->rmap->rstart;
3227   PetscCall(PetscMalloc1(m, &idx));
3228   PetscCall(ISGetIndices(isrow, &is_idx));
3229   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3230   PetscCall(ISRestoreIndices(isrow, &is_idx));
3231 
3232   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3233   PetscCall(ISGetBlockSize(isrow, &i));
3234   PetscCall(ISSetBlockSize(*isrow_d, i));
3235 
3236   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3237   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3238   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3239 
3240   PetscCall(VecDuplicate(lvec, &lcmap));
3241 
3242   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3243   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3244 
3245   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3246   /* off-process column indices */
3247   count = 0;
3248   PetscCall(PetscMalloc1(Bn, &idx));
3249   PetscCall(PetscMalloc1(Bn, &cmap1));
3250 
3251   PetscCall(VecGetArray(lvec, &xarray));
3252   PetscCall(VecGetArray(lcmap, &cmaparray));
3253   for (i = 0; i < Bn; i++) {
3254     if (PetscRealPart(xarray[i]) > -1.0) {
3255       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3256       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3257       count++;
3258     }
3259   }
3260   PetscCall(VecRestoreArray(lvec, &xarray));
3261   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3262 
3263   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3264   /* cannot ensure iscol_o has same blocksize as iscol! */
3265 
3266   PetscCall(PetscFree(idx));
3267   *garray = cmap1;
3268 
3269   PetscCall(VecDestroy(&x));
3270   PetscCall(VecDestroy(&cmap));
3271   PetscCall(VecDestroy(&lcmap));
3272   PetscFunctionReturn(PETSC_SUCCESS);
3273 }
3274 
3275 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3276 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3277 {
3278   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3279   Mat         M = NULL;
3280   MPI_Comm    comm;
3281   IS          iscol_d, isrow_d, iscol_o;
3282   Mat         Asub = NULL, Bsub = NULL;
3283   PetscInt    n, count, M_size, N_size;
3284 
3285   PetscFunctionBegin;
3286   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3287 
3288   if (call == MAT_REUSE_MATRIX) {
3289     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3290     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3291     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3292 
3293     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3294     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3295 
3296     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3297     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3298 
3299     /* Update diagonal and off-diagonal portions of submat */
3300     asub = (Mat_MPIAIJ *)(*submat)->data;
3301     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3302     PetscCall(ISGetLocalSize(iscol_o, &n));
3303     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3304     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3305     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3306 
3307   } else { /* call == MAT_INITIAL_MATRIX) */
3308     PetscInt *garray, *garray_compact;
3309     PetscInt  BsubN;
3310 
3311     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3312     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3313 
3314     /* Create local submatrices Asub and Bsub */
3315     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3316     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3317 
3318     // Compact garray so its not of size Bn
3319     PetscCall(ISGetSize(iscol_o, &count));
3320     PetscCall(PetscMalloc1(count, &garray_compact));
3321     PetscCall(PetscArraycpy(garray_compact, garray, count));
3322 
3323     /* Create submatrix M */
3324     PetscCall(ISGetSize(isrow, &M_size));
3325     PetscCall(ISGetSize(iscol, &N_size));
3326     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M));
3327 
3328     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3329     asub = (Mat_MPIAIJ *)M->data;
3330 
3331     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3332     n = asub->B->cmap->N;
3333     if (BsubN > n) {
3334       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3335       const PetscInt *idx;
3336       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3337       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3338 
3339       PetscCall(PetscMalloc1(n, &idx_new));
3340       j = 0;
3341       PetscCall(ISGetIndices(iscol_o, &idx));
3342       for (i = 0; i < n; i++) {
3343         if (j >= BsubN) break;
3344         while (subgarray[i] > garray[j]) j++;
3345 
3346         if (subgarray[i] == garray[j]) {
3347           idx_new[i] = idx[j++];
3348         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3349       }
3350       PetscCall(ISRestoreIndices(iscol_o, &idx));
3351 
3352       PetscCall(ISDestroy(&iscol_o));
3353       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3354 
3355     } else if (BsubN < n) {
3356       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3357     }
3358 
3359     PetscCall(PetscFree(garray));
3360     *submat = M;
3361 
3362     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3363     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3364     PetscCall(ISDestroy(&isrow_d));
3365 
3366     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3367     PetscCall(ISDestroy(&iscol_d));
3368 
3369     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3370     PetscCall(ISDestroy(&iscol_o));
3371   }
3372   PetscFunctionReturn(PETSC_SUCCESS);
3373 }
3374 
3375 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3376 {
3377   IS        iscol_local = NULL, isrow_d;
3378   PetscInt  csize;
3379   PetscInt  n, i, j, start, end;
3380   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3381   MPI_Comm  comm;
3382 
3383   PetscFunctionBegin;
3384   /* If isrow has same processor distribution as mat,
3385      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3386   if (call == MAT_REUSE_MATRIX) {
3387     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3388     if (isrow_d) {
3389       sameRowDist  = PETSC_TRUE;
3390       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3391     } else {
3392       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3393       if (iscol_local) {
3394         sameRowDist  = PETSC_TRUE;
3395         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3396       }
3397     }
3398   } else {
3399     /* Check if isrow has same processor distribution as mat */
3400     sameDist[0] = PETSC_FALSE;
3401     PetscCall(ISGetLocalSize(isrow, &n));
3402     if (!n) {
3403       sameDist[0] = PETSC_TRUE;
3404     } else {
3405       PetscCall(ISGetMinMax(isrow, &i, &j));
3406       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3407       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3408     }
3409 
3410     /* Check if iscol has same processor distribution as mat */
3411     sameDist[1] = PETSC_FALSE;
3412     PetscCall(ISGetLocalSize(iscol, &n));
3413     if (!n) {
3414       sameDist[1] = PETSC_TRUE;
3415     } else {
3416       PetscCall(ISGetMinMax(iscol, &i, &j));
3417       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3418       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3419     }
3420 
3421     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3422     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3423     sameRowDist = tsameDist[0];
3424   }
3425 
3426   if (sameRowDist) {
3427     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3428       /* isrow and iscol have same processor distribution as mat */
3429       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3430       PetscFunctionReturn(PETSC_SUCCESS);
3431     } else { /* sameRowDist */
3432       /* isrow has same processor distribution as mat */
3433       if (call == MAT_INITIAL_MATRIX) {
3434         PetscBool sorted;
3435         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3436         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3437         PetscCall(ISGetSize(iscol, &i));
3438         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3439 
3440         PetscCall(ISSorted(iscol_local, &sorted));
3441         if (sorted) {
3442           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3443           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3444           PetscFunctionReturn(PETSC_SUCCESS);
3445         }
3446       } else { /* call == MAT_REUSE_MATRIX */
3447         IS iscol_sub;
3448         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3449         if (iscol_sub) {
3450           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3451           PetscFunctionReturn(PETSC_SUCCESS);
3452         }
3453       }
3454     }
3455   }
3456 
3457   /* General case: iscol -> iscol_local which has global size of iscol */
3458   if (call == MAT_REUSE_MATRIX) {
3459     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3460     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3461   } else {
3462     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3463   }
3464 
3465   PetscCall(ISGetLocalSize(iscol, &csize));
3466   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3467 
3468   if (call == MAT_INITIAL_MATRIX) {
3469     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3470     PetscCall(ISDestroy(&iscol_local));
3471   }
3472   PetscFunctionReturn(PETSC_SUCCESS);
3473 }
3474 
3475 /*@C
3476   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3477   and "off-diagonal" part of the matrix in CSR format.
3478 
3479   Collective
3480 
3481   Input Parameters:
3482 + comm   - MPI communicator
3483 . M      - the global row size
3484 . N      - the global column size
3485 . A      - "diagonal" portion of matrix
3486 . B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3487 - garray - either `NULL` or the global index of `B` columns
3488 
3489   Output Parameter:
3490 . mat - the matrix, with input `A` as its local diagonal matrix
3491 
3492   Level: advanced
3493 
3494   Notes:
3495   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3496 
3497   `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore.
3498 
3499 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3500 @*/
3501 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat)
3502 {
3503   PetscInt m, n;
3504   MatType  mpi_mat_type;
3505 
3506   PetscFunctionBegin;
3507   PetscCall(MatCreate(comm, mat));
3508   PetscCall(MatGetSize(A, &m, &n));
3509   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3510   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3511 
3512   PetscCall(MatSetSizes(*mat, m, n, M, N));
3513   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3514   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3515   PetscCall(MatSetType(*mat, mpi_mat_type));
3516 
3517   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3518 
3519   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3520   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3521   PetscCall(MatSetMPIAIJWithSplitSeqAIJ(*mat, A, B, garray));
3522   PetscFunctionReturn(PETSC_SUCCESS);
3523 }
3524 
3525 /*
3526   MatSetMPIAIJWithSplitSeqAIJ - Set the diag and offdiag matrices of a `MATMPIAIJ` matrix.
3527    It is similar to `MatCreateMPIAIJWithSplitArrays()`. This routine allows passing in
3528    B with local indices and the correct size, along with the accompanying
3529    garray, hence skipping compactification
3530 
3531   Collective
3532 
3533   Input Parameters:
3534 +  mat    - the MATMPIAIJ matrix, which should have its type and layout set, but should not have its diag, offdiag matrices set
3535 .  A      - the diag matrix using local col ids
3536 .  B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3537 -  garray - either `NULL` or the global index of `B` columns
3538 
3539   Output Parameter:
3540 .  mat   - the updated `MATMPIAIJ` matrix
3541 
3542   Level: advanced
3543 
3544   Notes:
3545   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3546 
3547   `A` and `B` become part of output mat. The user cannot use `A` and `B` anymore.
3548 
3549 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3550 */
3551 PETSC_INTERN PetscErrorCode MatSetMPIAIJWithSplitSeqAIJ(Mat mat, Mat A, Mat B, PetscInt *garray)
3552 {
3553   PetscFunctionBegin;
3554   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
3555   PetscInt    m, n, M, N, Am, An, Bm, Bn;
3556 
3557   PetscCall(MatGetSize(mat, &M, &N));
3558   PetscCall(MatGetLocalSize(mat, &m, &n));
3559   PetscCall(MatGetLocalSize(A, &Am, &An));
3560   PetscCall(MatGetLocalSize(B, &Bm, &Bn));
3561 
3562   PetscCheck(m == Am && m == Bm, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of rows do not match");
3563   PetscCheck(n == An, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of columns do not match");
3564   PetscCheck(!mpiaij->A && !mpiaij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "A, B of the MPIAIJ matrix are not empty");
3565   mpiaij->A      = A;
3566   mpiaij->B      = B;
3567   mpiaij->garray = garray;
3568 
3569   mat->preallocated     = PETSC_TRUE;
3570   mat->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */
3571 
3572   PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3573   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3574   /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and
3575    also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced
3576    */
3577   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3578   PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3579   PetscCall(MatSetOption(mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3580   PetscFunctionReturn(PETSC_SUCCESS);
3581 }
3582 
3583 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3584 
3585 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3586 {
3587   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3588   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3589   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3590   Mat             M, Msub, B = a->B;
3591   MatScalar      *aa;
3592   Mat_SeqAIJ     *aij;
3593   PetscInt       *garray = a->garray, *colsub, Ncols;
3594   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3595   IS              iscol_sub, iscmap;
3596   const PetscInt *is_idx, *cmap;
3597   PetscBool       allcolumns = PETSC_FALSE;
3598   MPI_Comm        comm;
3599 
3600   PetscFunctionBegin;
3601   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3602   if (call == MAT_REUSE_MATRIX) {
3603     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3604     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3605     PetscCall(ISGetLocalSize(iscol_sub, &count));
3606 
3607     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3608     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3609 
3610     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3611     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3612 
3613     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3614 
3615   } else { /* call == MAT_INITIAL_MATRIX) */
3616     PetscBool flg;
3617 
3618     PetscCall(ISGetLocalSize(iscol, &n));
3619     PetscCall(ISGetSize(iscol, &Ncols));
3620 
3621     /* (1) iscol -> nonscalable iscol_local */
3622     /* Check for special case: each processor gets entire matrix columns */
3623     PetscCall(ISIdentity(iscol_local, &flg));
3624     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3625     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3626     if (allcolumns) {
3627       iscol_sub = iscol_local;
3628       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3629       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3630 
3631     } else {
3632       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3633       PetscInt *idx, *cmap1, k;
3634       PetscCall(PetscMalloc1(Ncols, &idx));
3635       PetscCall(PetscMalloc1(Ncols, &cmap1));
3636       PetscCall(ISGetIndices(iscol_local, &is_idx));
3637       count = 0;
3638       k     = 0;
3639       for (i = 0; i < Ncols; i++) {
3640         j = is_idx[i];
3641         if (j >= cstart && j < cend) {
3642           /* diagonal part of mat */
3643           idx[count]     = j;
3644           cmap1[count++] = i; /* column index in submat */
3645         } else if (Bn) {
3646           /* off-diagonal part of mat */
3647           if (j == garray[k]) {
3648             idx[count]     = j;
3649             cmap1[count++] = i; /* column index in submat */
3650           } else if (j > garray[k]) {
3651             while (j > garray[k] && k < Bn - 1) k++;
3652             if (j == garray[k]) {
3653               idx[count]     = j;
3654               cmap1[count++] = i; /* column index in submat */
3655             }
3656           }
3657         }
3658       }
3659       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3660 
3661       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3662       PetscCall(ISGetBlockSize(iscol, &cbs));
3663       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3664 
3665       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3666     }
3667 
3668     /* (3) Create sequential Msub */
3669     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3670   }
3671 
3672   PetscCall(ISGetLocalSize(iscol_sub, &count));
3673   aij = (Mat_SeqAIJ *)Msub->data;
3674   ii  = aij->i;
3675   PetscCall(ISGetIndices(iscmap, &cmap));
3676 
3677   /*
3678       m - number of local rows
3679       Ncols - number of columns (same on all processors)
3680       rstart - first row in new global matrix generated
3681   */
3682   PetscCall(MatGetSize(Msub, &m, NULL));
3683 
3684   if (call == MAT_INITIAL_MATRIX) {
3685     /* (4) Create parallel newmat */
3686     PetscMPIInt rank, size;
3687     PetscInt    csize;
3688 
3689     PetscCallMPI(MPI_Comm_size(comm, &size));
3690     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3691 
3692     /*
3693         Determine the number of non-zeros in the diagonal and off-diagonal
3694         portions of the matrix in order to do correct preallocation
3695     */
3696 
3697     /* first get start and end of "diagonal" columns */
3698     PetscCall(ISGetLocalSize(iscol, &csize));
3699     if (csize == PETSC_DECIDE) {
3700       PetscCall(ISGetSize(isrow, &mglobal));
3701       if (mglobal == Ncols) { /* square matrix */
3702         nlocal = m;
3703       } else {
3704         nlocal = Ncols / size + ((Ncols % size) > rank);
3705       }
3706     } else {
3707       nlocal = csize;
3708     }
3709     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3710     rstart = rend - nlocal;
3711     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3712 
3713     /* next, compute all the lengths */
3714     jj = aij->j;
3715     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3716     olens = dlens + m;
3717     for (i = 0; i < m; i++) {
3718       jend = ii[i + 1] - ii[i];
3719       olen = 0;
3720       dlen = 0;
3721       for (j = 0; j < jend; j++) {
3722         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3723         else dlen++;
3724         jj++;
3725       }
3726       olens[i] = olen;
3727       dlens[i] = dlen;
3728     }
3729 
3730     PetscCall(ISGetBlockSize(isrow, &bs));
3731     PetscCall(ISGetBlockSize(iscol, &cbs));
3732 
3733     PetscCall(MatCreate(comm, &M));
3734     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3735     PetscCall(MatSetBlockSizes(M, bs, cbs));
3736     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3737     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3738     PetscCall(PetscFree(dlens));
3739 
3740   } else { /* call == MAT_REUSE_MATRIX */
3741     M = *newmat;
3742     PetscCall(MatGetLocalSize(M, &i, NULL));
3743     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3744     PetscCall(MatZeroEntries(M));
3745     /*
3746          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3747        rather than the slower MatSetValues().
3748     */
3749     M->was_assembled = PETSC_TRUE;
3750     M->assembled     = PETSC_FALSE;
3751   }
3752 
3753   /* (5) Set values of Msub to *newmat */
3754   PetscCall(PetscMalloc1(count, &colsub));
3755   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3756 
3757   jj = aij->j;
3758   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3759   for (i = 0; i < m; i++) {
3760     row = rstart + i;
3761     nz  = ii[i + 1] - ii[i];
3762     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3763     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3764     jj += nz;
3765     aa += nz;
3766   }
3767   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3768   PetscCall(ISRestoreIndices(iscmap, &cmap));
3769 
3770   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3771   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3772 
3773   PetscCall(PetscFree(colsub));
3774 
3775   /* save Msub, iscol_sub and iscmap used in processor for next request */
3776   if (call == MAT_INITIAL_MATRIX) {
3777     *newmat = M;
3778     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3779     PetscCall(MatDestroy(&Msub));
3780 
3781     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3782     PetscCall(ISDestroy(&iscol_sub));
3783 
3784     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3785     PetscCall(ISDestroy(&iscmap));
3786 
3787     if (iscol_local) {
3788       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3789       PetscCall(ISDestroy(&iscol_local));
3790     }
3791   }
3792   PetscFunctionReturn(PETSC_SUCCESS);
3793 }
3794 
3795 /*
3796     Not great since it makes two copies of the submatrix, first an SeqAIJ
3797   in local and then by concatenating the local matrices the end result.
3798   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3799 
3800   This requires a sequential iscol with all indices.
3801 */
3802 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3803 {
3804   PetscMPIInt rank, size;
3805   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3806   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3807   Mat         M, Mreuse;
3808   MatScalar  *aa, *vwork;
3809   MPI_Comm    comm;
3810   Mat_SeqAIJ *aij;
3811   PetscBool   colflag, allcolumns = PETSC_FALSE;
3812 
3813   PetscFunctionBegin;
3814   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3815   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3816   PetscCallMPI(MPI_Comm_size(comm, &size));
3817 
3818   /* Check for special case: each processor gets entire matrix columns */
3819   PetscCall(ISIdentity(iscol, &colflag));
3820   PetscCall(ISGetLocalSize(iscol, &n));
3821   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3822   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3823 
3824   if (call == MAT_REUSE_MATRIX) {
3825     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3826     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3827     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3828   } else {
3829     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3830   }
3831 
3832   /*
3833       m - number of local rows
3834       n - number of columns (same on all processors)
3835       rstart - first row in new global matrix generated
3836   */
3837   PetscCall(MatGetSize(Mreuse, &m, &n));
3838   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3839   if (call == MAT_INITIAL_MATRIX) {
3840     aij = (Mat_SeqAIJ *)Mreuse->data;
3841     ii  = aij->i;
3842     jj  = aij->j;
3843 
3844     /*
3845         Determine the number of non-zeros in the diagonal and off-diagonal
3846         portions of the matrix in order to do correct preallocation
3847     */
3848 
3849     /* first get start and end of "diagonal" columns */
3850     if (csize == PETSC_DECIDE) {
3851       PetscCall(ISGetSize(isrow, &mglobal));
3852       if (mglobal == n) { /* square matrix */
3853         nlocal = m;
3854       } else {
3855         nlocal = n / size + ((n % size) > rank);
3856       }
3857     } else {
3858       nlocal = csize;
3859     }
3860     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3861     rstart = rend - nlocal;
3862     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3863 
3864     /* next, compute all the lengths */
3865     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3866     olens = dlens + m;
3867     for (i = 0; i < m; i++) {
3868       jend = ii[i + 1] - ii[i];
3869       olen = 0;
3870       dlen = 0;
3871       for (j = 0; j < jend; j++) {
3872         if (*jj < rstart || *jj >= rend) olen++;
3873         else dlen++;
3874         jj++;
3875       }
3876       olens[i] = olen;
3877       dlens[i] = dlen;
3878     }
3879     PetscCall(MatCreate(comm, &M));
3880     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3881     PetscCall(MatSetBlockSizes(M, bs, cbs));
3882     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3883     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3884     PetscCall(PetscFree(dlens));
3885   } else {
3886     PetscInt ml, nl;
3887 
3888     M = *newmat;
3889     PetscCall(MatGetLocalSize(M, &ml, &nl));
3890     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3891     PetscCall(MatZeroEntries(M));
3892     /*
3893          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3894        rather than the slower MatSetValues().
3895     */
3896     M->was_assembled = PETSC_TRUE;
3897     M->assembled     = PETSC_FALSE;
3898   }
3899   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3900   aij = (Mat_SeqAIJ *)Mreuse->data;
3901   ii  = aij->i;
3902   jj  = aij->j;
3903 
3904   /* trigger copy to CPU if needed */
3905   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3906   for (i = 0; i < m; i++) {
3907     row   = rstart + i;
3908     nz    = ii[i + 1] - ii[i];
3909     cwork = jj;
3910     jj    = PetscSafePointerPlusOffset(jj, nz);
3911     vwork = aa;
3912     aa    = PetscSafePointerPlusOffset(aa, nz);
3913     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3914   }
3915   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3916 
3917   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3918   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3919   *newmat = M;
3920 
3921   /* save submatrix used in processor for next request */
3922   if (call == MAT_INITIAL_MATRIX) {
3923     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3924     PetscCall(MatDestroy(&Mreuse));
3925   }
3926   PetscFunctionReturn(PETSC_SUCCESS);
3927 }
3928 
3929 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3930 {
3931   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3932   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3933   const PetscInt *JJ;
3934   PetscBool       nooffprocentries;
3935   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3936 
3937   PetscFunctionBegin;
3938   PetscCall(PetscLayoutSetUp(B->rmap));
3939   PetscCall(PetscLayoutSetUp(B->cmap));
3940   m       = B->rmap->n;
3941   cstart  = B->cmap->rstart;
3942   cend    = B->cmap->rend;
3943   rstart  = B->rmap->rstart;
3944   irstart = Ii[0];
3945 
3946   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3947 
3948   if (PetscDefined(USE_DEBUG)) {
3949     for (i = 0; i < m; i++) {
3950       nnz = Ii[i + 1] - Ii[i];
3951       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3952       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3953       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3954       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3955     }
3956   }
3957 
3958   for (i = 0; i < m; i++) {
3959     nnz     = Ii[i + 1] - Ii[i];
3960     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3961     nnz_max = PetscMax(nnz_max, nnz);
3962     d       = 0;
3963     for (j = 0; j < nnz; j++) {
3964       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3965     }
3966     d_nnz[i] = d;
3967     o_nnz[i] = nnz - d;
3968   }
3969   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3970   PetscCall(PetscFree2(d_nnz, o_nnz));
3971 
3972   for (i = 0; i < m; i++) {
3973     ii = i + rstart;
3974     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3975   }
3976   nooffprocentries    = B->nooffprocentries;
3977   B->nooffprocentries = PETSC_TRUE;
3978   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3979   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3980   B->nooffprocentries = nooffprocentries;
3981 
3982   /* count number of entries below block diagonal */
3983   PetscCall(PetscFree(Aij->ld));
3984   PetscCall(PetscCalloc1(m, &ld));
3985   Aij->ld = ld;
3986   for (i = 0; i < m; i++) {
3987     nnz = Ii[i + 1] - Ii[i];
3988     j   = 0;
3989     while (j < nnz && J[j] < cstart) j++;
3990     ld[i] = j;
3991     if (J) J += nnz;
3992   }
3993 
3994   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3995   PetscFunctionReturn(PETSC_SUCCESS);
3996 }
3997 
3998 /*@
3999   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
4000   (the default parallel PETSc format).
4001 
4002   Collective
4003 
4004   Input Parameters:
4005 + B - the matrix
4006 . i - the indices into `j` for the start of each local row (indices start with zero)
4007 . j - the column indices for each local row (indices start with zero)
4008 - v - optional values in the matrix
4009 
4010   Level: developer
4011 
4012   Notes:
4013   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
4014   thus you CANNOT change the matrix entries by changing the values of `v` after you have
4015   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4016 
4017   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4018 
4019   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
4020 
4021   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
4022 
4023   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
4024   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4025 
4026   The format which is used for the sparse matrix input, is equivalent to a
4027   row-major ordering.. i.e for the following matrix, the input data expected is
4028   as shown
4029 .vb
4030         1 0 0
4031         2 0 3     P0
4032        -------
4033         4 5 6     P1
4034 
4035      Process0 [P0] rows_owned=[0,1]
4036         i =  {0,1,3}  [size = nrow+1  = 2+1]
4037         j =  {0,0,2}  [size = 3]
4038         v =  {1,2,3}  [size = 3]
4039 
4040      Process1 [P1] rows_owned=[2]
4041         i =  {0,3}    [size = nrow+1  = 1+1]
4042         j =  {0,1,2}  [size = 3]
4043         v =  {4,5,6}  [size = 3]
4044 .ve
4045 
4046 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4047           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4048 @*/
4049 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4050 {
4051   PetscFunctionBegin;
4052   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4053   PetscFunctionReturn(PETSC_SUCCESS);
4054 }
4055 
4056 /*@
4057   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4058   (the default parallel PETSc format).  For good matrix assembly performance
4059   the user should preallocate the matrix storage by setting the parameters
4060   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4061 
4062   Collective
4063 
4064   Input Parameters:
4065 + B     - the matrix
4066 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4067            (same value is used for all local rows)
4068 . d_nnz - array containing the number of nonzeros in the various rows of the
4069            DIAGONAL portion of the local submatrix (possibly different for each row)
4070            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4071            The size of this array is equal to the number of local rows, i.e 'm'.
4072            For matrices that will be factored, you must leave room for (and set)
4073            the diagonal entry even if it is zero.
4074 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4075            submatrix (same value is used for all local rows).
4076 - o_nnz - array containing the number of nonzeros in the various rows of the
4077            OFF-DIAGONAL portion of the local submatrix (possibly different for
4078            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4079            structure. The size of this array is equal to the number
4080            of local rows, i.e 'm'.
4081 
4082   Example Usage:
4083   Consider the following 8x8 matrix with 34 non-zero values, that is
4084   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4085   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4086   as follows
4087 
4088 .vb
4089             1  2  0  |  0  3  0  |  0  4
4090     Proc0   0  5  6  |  7  0  0  |  8  0
4091             9  0 10  | 11  0  0  | 12  0
4092     -------------------------------------
4093            13  0 14  | 15 16 17  |  0  0
4094     Proc1   0 18  0  | 19 20 21  |  0  0
4095             0  0  0  | 22 23  0  | 24  0
4096     -------------------------------------
4097     Proc2  25 26 27  |  0  0 28  | 29  0
4098            30  0  0  | 31 32 33  |  0 34
4099 .ve
4100 
4101   This can be represented as a collection of submatrices as
4102 .vb
4103       A B C
4104       D E F
4105       G H I
4106 .ve
4107 
4108   Where the submatrices A,B,C are owned by proc0, D,E,F are
4109   owned by proc1, G,H,I are owned by proc2.
4110 
4111   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4112   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4113   The 'M','N' parameters are 8,8, and have the same values on all procs.
4114 
4115   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4116   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4117   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4118   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4119   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4120   matrix, and [DF] as another `MATSEQAIJ` matrix.
4121 
4122   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4123   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4124   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4125   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4126   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4127   In this case, the values of `d_nz`, `o_nz` are
4128 .vb
4129      proc0  dnz = 2, o_nz = 2
4130      proc1  dnz = 3, o_nz = 2
4131      proc2  dnz = 1, o_nz = 4
4132 .ve
4133   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4134   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4135   for proc3. i.e we are using 12+15+10=37 storage locations to store
4136   34 values.
4137 
4138   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4139   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4140   In the above case the values for `d_nnz`, `o_nnz` are
4141 .vb
4142      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4143      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4144      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4145 .ve
4146   Here the space allocated is sum of all the above values i.e 34, and
4147   hence pre-allocation is perfect.
4148 
4149   Level: intermediate
4150 
4151   Notes:
4152   If the *_nnz parameter is given then the *_nz parameter is ignored
4153 
4154   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4155   storage.  The stored row and column indices begin with zero.
4156   See [Sparse Matrices](sec_matsparse) for details.
4157 
4158   The parallel matrix is partitioned such that the first m0 rows belong to
4159   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4160   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4161 
4162   The DIAGONAL portion of the local submatrix of a processor can be defined
4163   as the submatrix which is obtained by extraction the part corresponding to
4164   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4165   first row that belongs to the processor, r2 is the last row belonging to
4166   the this processor, and c1-c2 is range of indices of the local part of a
4167   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4168   common case of a square matrix, the row and column ranges are the same and
4169   the DIAGONAL part is also square. The remaining portion of the local
4170   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4171 
4172   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4173 
4174   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4175   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4176   You can also run with the option `-info` and look for messages with the string
4177   malloc in them to see if additional memory allocation was needed.
4178 
4179 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4180           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4181 @*/
4182 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4183 {
4184   PetscFunctionBegin;
4185   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4186   PetscValidType(B, 1);
4187   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4188   PetscFunctionReturn(PETSC_SUCCESS);
4189 }
4190 
4191 /*@
4192   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4193   CSR format for the local rows.
4194 
4195   Collective
4196 
4197   Input Parameters:
4198 + comm - MPI communicator
4199 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4200 . n    - This value should be the same as the local size used in creating the
4201          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4202          calculated if `N` is given) For square matrices n is almost always `m`.
4203 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4204 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4205 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4206 . j    - global column indices
4207 - a    - optional matrix values
4208 
4209   Output Parameter:
4210 . mat - the matrix
4211 
4212   Level: intermediate
4213 
4214   Notes:
4215   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4216   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4217   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4218 
4219   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4220 
4221   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4222 
4223   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4224   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4225 
4226   The format which is used for the sparse matrix input, is equivalent to a
4227   row-major ordering, i.e., for the following matrix, the input data expected is
4228   as shown
4229 .vb
4230         1 0 0
4231         2 0 3     P0
4232        -------
4233         4 5 6     P1
4234 
4235      Process0 [P0] rows_owned=[0,1]
4236         i =  {0,1,3}  [size = nrow+1  = 2+1]
4237         j =  {0,0,2}  [size = 3]
4238         v =  {1,2,3}  [size = 3]
4239 
4240      Process1 [P1] rows_owned=[2]
4241         i =  {0,3}    [size = nrow+1  = 1+1]
4242         j =  {0,1,2}  [size = 3]
4243         v =  {4,5,6}  [size = 3]
4244 .ve
4245 
4246 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4247           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4248 @*/
4249 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4250 {
4251   PetscFunctionBegin;
4252   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4253   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4254   PetscCall(MatCreate(comm, mat));
4255   PetscCall(MatSetSizes(*mat, m, n, M, N));
4256   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4257   PetscCall(MatSetType(*mat, MATMPIAIJ));
4258   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4259   PetscFunctionReturn(PETSC_SUCCESS);
4260 }
4261 
4262 /*@
4263   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4264   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4265   from `MatCreateMPIAIJWithArrays()`
4266 
4267   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4268 
4269   Collective
4270 
4271   Input Parameters:
4272 + mat - the matrix
4273 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4274 . n   - This value should be the same as the local size used in creating the
4275        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4276        calculated if N is given) For square matrices n is almost always m.
4277 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4278 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4279 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4280 . J   - column indices
4281 - v   - matrix values
4282 
4283   Level: deprecated
4284 
4285 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4286           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4287 @*/
4288 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4289 {
4290   PetscInt        nnz, i;
4291   PetscBool       nooffprocentries;
4292   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4293   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4294   PetscScalar    *ad, *ao;
4295   PetscInt        ldi, Iii, md;
4296   const PetscInt *Adi = Ad->i;
4297   PetscInt       *ld  = Aij->ld;
4298 
4299   PetscFunctionBegin;
4300   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4301   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4302   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4303   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4304 
4305   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4306   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4307 
4308   for (i = 0; i < m; i++) {
4309     if (PetscDefined(USE_DEBUG)) {
4310       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4311         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4312         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4313       }
4314     }
4315     nnz = Ii[i + 1] - Ii[i];
4316     Iii = Ii[i];
4317     ldi = ld[i];
4318     md  = Adi[i + 1] - Adi[i];
4319     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4320     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4321     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4322     ad += md;
4323     ao += nnz - md;
4324   }
4325   nooffprocentries      = mat->nooffprocentries;
4326   mat->nooffprocentries = PETSC_TRUE;
4327   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4328   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4329   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4330   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4331   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4332   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4333   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4334   mat->nooffprocentries = nooffprocentries;
4335   PetscFunctionReturn(PETSC_SUCCESS);
4336 }
4337 
4338 /*@
4339   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4340 
4341   Collective
4342 
4343   Input Parameters:
4344 + mat - the matrix
4345 - v   - matrix values, stored by row
4346 
4347   Level: intermediate
4348 
4349   Notes:
4350   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4351 
4352   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4353 
4354 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4355           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4356 @*/
4357 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4358 {
4359   PetscInt        nnz, i, m;
4360   PetscBool       nooffprocentries;
4361   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4362   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4363   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4364   PetscScalar    *ad, *ao;
4365   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4366   PetscInt        ldi, Iii, md;
4367   PetscInt       *ld = Aij->ld;
4368 
4369   PetscFunctionBegin;
4370   m = mat->rmap->n;
4371 
4372   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4373   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4374   Iii = 0;
4375   for (i = 0; i < m; i++) {
4376     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4377     ldi = ld[i];
4378     md  = Adi[i + 1] - Adi[i];
4379     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4380     ad += md;
4381     if (ao) {
4382       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4383       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4384       ao += nnz - md;
4385     }
4386     Iii += nnz;
4387   }
4388   nooffprocentries      = mat->nooffprocentries;
4389   mat->nooffprocentries = PETSC_TRUE;
4390   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4391   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4392   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4393   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4394   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4395   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4396   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4397   mat->nooffprocentries = nooffprocentries;
4398   PetscFunctionReturn(PETSC_SUCCESS);
4399 }
4400 
4401 /*@
4402   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4403   (the default parallel PETSc format).  For good matrix assembly performance
4404   the user should preallocate the matrix storage by setting the parameters
4405   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4406 
4407   Collective
4408 
4409   Input Parameters:
4410 + comm  - MPI communicator
4411 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4412           This value should be the same as the local size used in creating the
4413           y vector for the matrix-vector product y = Ax.
4414 . n     - This value should be the same as the local size used in creating the
4415           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4416           calculated if N is given) For square matrices n is almost always m.
4417 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4418 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4419 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4420           (same value is used for all local rows)
4421 . d_nnz - array containing the number of nonzeros in the various rows of the
4422           DIAGONAL portion of the local submatrix (possibly different for each row)
4423           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4424           The size of this array is equal to the number of local rows, i.e 'm'.
4425 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4426           submatrix (same value is used for all local rows).
4427 - o_nnz - array containing the number of nonzeros in the various rows of the
4428           OFF-DIAGONAL portion of the local submatrix (possibly different for
4429           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4430           structure. The size of this array is equal to the number
4431           of local rows, i.e 'm'.
4432 
4433   Output Parameter:
4434 . A - the matrix
4435 
4436   Options Database Keys:
4437 + -mat_no_inode                     - Do not use inodes
4438 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4439 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4440                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4441                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4442 
4443   Level: intermediate
4444 
4445   Notes:
4446   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4447   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4448   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4449 
4450   If the *_nnz parameter is given then the *_nz parameter is ignored
4451 
4452   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4453   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4454   storage requirements for this matrix.
4455 
4456   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4457   processor than it must be used on all processors that share the object for
4458   that argument.
4459 
4460   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4461   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4462 
4463   The user MUST specify either the local or global matrix dimensions
4464   (possibly both).
4465 
4466   The parallel matrix is partitioned across processors such that the
4467   first `m0` rows belong to process 0, the next `m1` rows belong to
4468   process 1, the next `m2` rows belong to process 2, etc., where
4469   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4470   values corresponding to [m x N] submatrix.
4471 
4472   The columns are logically partitioned with the n0 columns belonging
4473   to 0th partition, the next n1 columns belonging to the next
4474   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4475 
4476   The DIAGONAL portion of the local submatrix on any given processor
4477   is the submatrix corresponding to the rows and columns m,n
4478   corresponding to the given processor. i.e diagonal matrix on
4479   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4480   etc. The remaining portion of the local submatrix [m x (N-n)]
4481   constitute the OFF-DIAGONAL portion. The example below better
4482   illustrates this concept. The two matrices, the DIAGONAL portion and
4483   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4484 
4485   For a square global matrix we define each processor's diagonal portion
4486   to be its local rows and the corresponding columns (a square submatrix);
4487   each processor's off-diagonal portion encompasses the remainder of the
4488   local matrix (a rectangular submatrix).
4489 
4490   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4491 
4492   When calling this routine with a single process communicator, a matrix of
4493   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4494   type of communicator, use the construction mechanism
4495 .vb
4496   MatCreate(..., &A);
4497   MatSetType(A, MATMPIAIJ);
4498   MatSetSizes(A, m, n, M, N);
4499   MatMPIAIJSetPreallocation(A, ...);
4500 .ve
4501 
4502   By default, this format uses inodes (identical nodes) when possible.
4503   We search for consecutive rows with the same nonzero structure, thereby
4504   reusing matrix information to achieve increased efficiency.
4505 
4506   Example Usage:
4507   Consider the following 8x8 matrix with 34 non-zero values, that is
4508   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4509   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4510   as follows
4511 
4512 .vb
4513             1  2  0  |  0  3  0  |  0  4
4514     Proc0   0  5  6  |  7  0  0  |  8  0
4515             9  0 10  | 11  0  0  | 12  0
4516     -------------------------------------
4517            13  0 14  | 15 16 17  |  0  0
4518     Proc1   0 18  0  | 19 20 21  |  0  0
4519             0  0  0  | 22 23  0  | 24  0
4520     -------------------------------------
4521     Proc2  25 26 27  |  0  0 28  | 29  0
4522            30  0  0  | 31 32 33  |  0 34
4523 .ve
4524 
4525   This can be represented as a collection of submatrices as
4526 
4527 .vb
4528       A B C
4529       D E F
4530       G H I
4531 .ve
4532 
4533   Where the submatrices A,B,C are owned by proc0, D,E,F are
4534   owned by proc1, G,H,I are owned by proc2.
4535 
4536   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4537   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4538   The 'M','N' parameters are 8,8, and have the same values on all procs.
4539 
4540   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4541   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4542   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4543   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4544   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4545   matrix, and [DF] as another SeqAIJ matrix.
4546 
4547   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4548   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4549   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4550   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4551   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4552   In this case, the values of `d_nz`,`o_nz` are
4553 .vb
4554      proc0  dnz = 2, o_nz = 2
4555      proc1  dnz = 3, o_nz = 2
4556      proc2  dnz = 1, o_nz = 4
4557 .ve
4558   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4559   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4560   for proc3. i.e we are using 12+15+10=37 storage locations to store
4561   34 values.
4562 
4563   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4564   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4565   In the above case the values for d_nnz,o_nnz are
4566 .vb
4567      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4568      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4569      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4570 .ve
4571   Here the space allocated is sum of all the above values i.e 34, and
4572   hence pre-allocation is perfect.
4573 
4574 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4575           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4576           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4577 @*/
4578 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4579 {
4580   PetscMPIInt size;
4581 
4582   PetscFunctionBegin;
4583   PetscCall(MatCreate(comm, A));
4584   PetscCall(MatSetSizes(*A, m, n, M, N));
4585   PetscCallMPI(MPI_Comm_size(comm, &size));
4586   if (size > 1) {
4587     PetscCall(MatSetType(*A, MATMPIAIJ));
4588     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4589   } else {
4590     PetscCall(MatSetType(*A, MATSEQAIJ));
4591     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4592   }
4593   PetscFunctionReturn(PETSC_SUCCESS);
4594 }
4595 
4596 /*@C
4597   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4598 
4599   Not Collective
4600 
4601   Input Parameter:
4602 . A - The `MATMPIAIJ` matrix
4603 
4604   Output Parameters:
4605 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4606 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4607 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4608 
4609   Level: intermediate
4610 
4611   Note:
4612   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4613   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4614   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4615   local column numbers to global column numbers in the original matrix.
4616 
4617 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4618 @*/
4619 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4620 {
4621   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4622   PetscBool   flg;
4623 
4624   PetscFunctionBegin;
4625   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4626   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4627   if (Ad) *Ad = a->A;
4628   if (Ao) *Ao = a->B;
4629   if (colmap) *colmap = a->garray;
4630   PetscFunctionReturn(PETSC_SUCCESS);
4631 }
4632 
4633 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4634 {
4635   PetscInt     m, N, i, rstart, nnz, Ii;
4636   PetscInt    *indx;
4637   PetscScalar *values;
4638   MatType      rootType;
4639 
4640   PetscFunctionBegin;
4641   PetscCall(MatGetSize(inmat, &m, &N));
4642   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4643     PetscInt *dnz, *onz, sum, bs, cbs;
4644 
4645     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4646     /* Check sum(n) = N */
4647     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4648     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4649 
4650     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4651     rstart -= m;
4652 
4653     MatPreallocateBegin(comm, m, n, dnz, onz);
4654     for (i = 0; i < m; i++) {
4655       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4656       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4657       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4658     }
4659 
4660     PetscCall(MatCreate(comm, outmat));
4661     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4662     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4663     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4664     PetscCall(MatGetRootType_Private(inmat, &rootType));
4665     PetscCall(MatSetType(*outmat, rootType));
4666     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4667     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4668     MatPreallocateEnd(dnz, onz);
4669     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4670   }
4671 
4672   /* numeric phase */
4673   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4674   for (i = 0; i < m; i++) {
4675     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4676     Ii = i + rstart;
4677     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4678     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4679   }
4680   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4681   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4682   PetscFunctionReturn(PETSC_SUCCESS);
4683 }
4684 
4685 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4686 {
4687   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
4688 
4689   PetscFunctionBegin;
4690   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4691   PetscCall(PetscFree(merge->id_r));
4692   PetscCall(PetscFree(merge->len_s));
4693   PetscCall(PetscFree(merge->len_r));
4694   PetscCall(PetscFree(merge->bi));
4695   PetscCall(PetscFree(merge->bj));
4696   PetscCall(PetscFree(merge->buf_ri[0]));
4697   PetscCall(PetscFree(merge->buf_ri));
4698   PetscCall(PetscFree(merge->buf_rj[0]));
4699   PetscCall(PetscFree(merge->buf_rj));
4700   PetscCall(PetscFree(merge->coi));
4701   PetscCall(PetscFree(merge->coj));
4702   PetscCall(PetscFree(merge->owners_co));
4703   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4704   PetscCall(PetscFree(merge));
4705   PetscFunctionReturn(PETSC_SUCCESS);
4706 }
4707 
4708 #include <../src/mat/utils/freespace.h>
4709 #include <petscbt.h>
4710 
4711 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4712 {
4713   MPI_Comm             comm;
4714   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4715   PetscMPIInt          size, rank, taga, *len_s;
4716   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4717   PetscMPIInt          proc, k;
4718   PetscInt           **buf_ri, **buf_rj;
4719   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4720   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4721   MPI_Request         *s_waits, *r_waits;
4722   MPI_Status          *status;
4723   const MatScalar     *aa, *a_a;
4724   MatScalar          **abuf_r, *ba_i;
4725   Mat_Merge_SeqsToMPI *merge;
4726   PetscContainer       container;
4727 
4728   PetscFunctionBegin;
4729   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4730   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4731 
4732   PetscCallMPI(MPI_Comm_size(comm, &size));
4733   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4734 
4735   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4736   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4737   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4738   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4739   aa = a_a;
4740 
4741   bi     = merge->bi;
4742   bj     = merge->bj;
4743   buf_ri = merge->buf_ri;
4744   buf_rj = merge->buf_rj;
4745 
4746   PetscCall(PetscMalloc1(size, &status));
4747   owners = merge->rowmap->range;
4748   len_s  = merge->len_s;
4749 
4750   /* send and recv matrix values */
4751   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4752   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4753 
4754   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4755   for (proc = 0, k = 0; proc < size; proc++) {
4756     if (!len_s[proc]) continue;
4757     i = owners[proc];
4758     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4759     k++;
4760   }
4761 
4762   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4763   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4764   PetscCall(PetscFree(status));
4765 
4766   PetscCall(PetscFree(s_waits));
4767   PetscCall(PetscFree(r_waits));
4768 
4769   /* insert mat values of mpimat */
4770   PetscCall(PetscMalloc1(N, &ba_i));
4771   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4772 
4773   for (k = 0; k < merge->nrecv; k++) {
4774     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4775     nrows       = *buf_ri_k[k];
4776     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4777     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4778   }
4779 
4780   /* set values of ba */
4781   m = merge->rowmap->n;
4782   for (i = 0; i < m; i++) {
4783     arow = owners[rank] + i;
4784     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4785     bnzi = bi[i + 1] - bi[i];
4786     PetscCall(PetscArrayzero(ba_i, bnzi));
4787 
4788     /* add local non-zero vals of this proc's seqmat into ba */
4789     anzi   = ai[arow + 1] - ai[arow];
4790     aj     = a->j + ai[arow];
4791     aa     = a_a + ai[arow];
4792     nextaj = 0;
4793     for (j = 0; nextaj < anzi; j++) {
4794       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4795         ba_i[j] += aa[nextaj++];
4796       }
4797     }
4798 
4799     /* add received vals into ba */
4800     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4801       /* i-th row */
4802       if (i == *nextrow[k]) {
4803         anzi   = *(nextai[k] + 1) - *nextai[k];
4804         aj     = buf_rj[k] + *nextai[k];
4805         aa     = abuf_r[k] + *nextai[k];
4806         nextaj = 0;
4807         for (j = 0; nextaj < anzi; j++) {
4808           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4809             ba_i[j] += aa[nextaj++];
4810           }
4811         }
4812         nextrow[k]++;
4813         nextai[k]++;
4814       }
4815     }
4816     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4817   }
4818   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4819   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4820   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4821 
4822   PetscCall(PetscFree(abuf_r[0]));
4823   PetscCall(PetscFree(abuf_r));
4824   PetscCall(PetscFree(ba_i));
4825   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4826   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4827   PetscFunctionReturn(PETSC_SUCCESS);
4828 }
4829 
4830 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4831 {
4832   Mat                  B_mpi;
4833   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4834   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4835   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4836   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4837   PetscInt             len, *dnz, *onz, bs, cbs;
4838   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4839   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4840   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4841   MPI_Status          *status;
4842   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4843   PetscBT              lnkbt;
4844   Mat_Merge_SeqsToMPI *merge;
4845   PetscContainer       container;
4846 
4847   PetscFunctionBegin;
4848   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4849 
4850   /* make sure it is a PETSc comm */
4851   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4852   PetscCallMPI(MPI_Comm_size(comm, &size));
4853   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4854 
4855   PetscCall(PetscNew(&merge));
4856   PetscCall(PetscMalloc1(size, &status));
4857 
4858   /* determine row ownership */
4859   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4860   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4861   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4862   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4863   PetscCall(PetscLayoutSetUp(merge->rowmap));
4864   PetscCall(PetscMalloc1(size, &len_si));
4865   PetscCall(PetscMalloc1(size, &merge->len_s));
4866 
4867   m      = merge->rowmap->n;
4868   owners = merge->rowmap->range;
4869 
4870   /* determine the number of messages to send, their lengths */
4871   len_s = merge->len_s;
4872 
4873   len          = 0; /* length of buf_si[] */
4874   merge->nsend = 0;
4875   for (PetscMPIInt proc = 0; proc < size; proc++) {
4876     len_si[proc] = 0;
4877     if (proc == rank) {
4878       len_s[proc] = 0;
4879     } else {
4880       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4881       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4882     }
4883     if (len_s[proc]) {
4884       merge->nsend++;
4885       nrows = 0;
4886       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4887         if (ai[i + 1] > ai[i]) nrows++;
4888       }
4889       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4890       len += len_si[proc];
4891     }
4892   }
4893 
4894   /* determine the number and length of messages to receive for ij-structure */
4895   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4896   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4897 
4898   /* post the Irecv of j-structure */
4899   PetscCall(PetscCommGetNewTag(comm, &tagj));
4900   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4901 
4902   /* post the Isend of j-structure */
4903   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4904 
4905   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4906     if (!len_s[proc]) continue;
4907     i = owners[proc];
4908     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4909     k++;
4910   }
4911 
4912   /* receives and sends of j-structure are complete */
4913   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4914   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4915 
4916   /* send and recv i-structure */
4917   PetscCall(PetscCommGetNewTag(comm, &tagi));
4918   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4919 
4920   PetscCall(PetscMalloc1(len + 1, &buf_s));
4921   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4922   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4923     if (!len_s[proc]) continue;
4924     /* form outgoing message for i-structure:
4925          buf_si[0]:                 nrows to be sent
4926                [1:nrows]:           row index (global)
4927                [nrows+1:2*nrows+1]: i-structure index
4928     */
4929     nrows       = len_si[proc] / 2 - 1;
4930     buf_si_i    = buf_si + nrows + 1;
4931     buf_si[0]   = nrows;
4932     buf_si_i[0] = 0;
4933     nrows       = 0;
4934     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4935       anzi = ai[i + 1] - ai[i];
4936       if (anzi) {
4937         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4938         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4939         nrows++;
4940       }
4941     }
4942     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4943     k++;
4944     buf_si += len_si[proc];
4945   }
4946 
4947   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4948   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4949 
4950   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4951   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4952 
4953   PetscCall(PetscFree(len_si));
4954   PetscCall(PetscFree(len_ri));
4955   PetscCall(PetscFree(rj_waits));
4956   PetscCall(PetscFree2(si_waits, sj_waits));
4957   PetscCall(PetscFree(ri_waits));
4958   PetscCall(PetscFree(buf_s));
4959   PetscCall(PetscFree(status));
4960 
4961   /* compute a local seq matrix in each processor */
4962   /* allocate bi array and free space for accumulating nonzero column info */
4963   PetscCall(PetscMalloc1(m + 1, &bi));
4964   bi[0] = 0;
4965 
4966   /* create and initialize a linked list */
4967   nlnk = N + 1;
4968   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4969 
4970   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4971   len = ai[owners[rank + 1]] - ai[owners[rank]];
4972   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4973 
4974   current_space = free_space;
4975 
4976   /* determine symbolic info for each local row */
4977   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4978 
4979   for (k = 0; k < merge->nrecv; k++) {
4980     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4981     nrows       = *buf_ri_k[k];
4982     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4983     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4984   }
4985 
4986   MatPreallocateBegin(comm, m, n, dnz, onz);
4987   len = 0;
4988   for (i = 0; i < m; i++) {
4989     bnzi = 0;
4990     /* add local non-zero cols of this proc's seqmat into lnk */
4991     arow = owners[rank] + i;
4992     anzi = ai[arow + 1] - ai[arow];
4993     aj   = a->j + ai[arow];
4994     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4995     bnzi += nlnk;
4996     /* add received col data into lnk */
4997     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4998       if (i == *nextrow[k]) {            /* i-th row */
4999         anzi = *(nextai[k] + 1) - *nextai[k];
5000         aj   = buf_rj[k] + *nextai[k];
5001         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5002         bnzi += nlnk;
5003         nextrow[k]++;
5004         nextai[k]++;
5005       }
5006     }
5007     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5008 
5009     /* if free space is not available, make more free space */
5010     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5011     /* copy data into free space, then initialize lnk */
5012     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5013     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5014 
5015     current_space->array += bnzi;
5016     current_space->local_used += bnzi;
5017     current_space->local_remaining -= bnzi;
5018 
5019     bi[i + 1] = bi[i] + bnzi;
5020   }
5021 
5022   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5023 
5024   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5025   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5026   PetscCall(PetscLLDestroy(lnk, lnkbt));
5027 
5028   /* create symbolic parallel matrix B_mpi */
5029   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5030   PetscCall(MatCreate(comm, &B_mpi));
5031   if (n == PETSC_DECIDE) {
5032     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5033   } else {
5034     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5035   }
5036   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5037   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5038   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5039   MatPreallocateEnd(dnz, onz);
5040   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5041 
5042   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5043   B_mpi->assembled = PETSC_FALSE;
5044   merge->bi        = bi;
5045   merge->bj        = bj;
5046   merge->buf_ri    = buf_ri;
5047   merge->buf_rj    = buf_rj;
5048   merge->coi       = NULL;
5049   merge->coj       = NULL;
5050   merge->owners_co = NULL;
5051 
5052   PetscCall(PetscCommDestroy(&comm));
5053 
5054   /* attach the supporting struct to B_mpi for reuse */
5055   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5056   PetscCall(PetscContainerSetPointer(container, merge));
5057   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5058   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5059   PetscCall(PetscContainerDestroy(&container));
5060   *mpimat = B_mpi;
5061 
5062   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5063   PetscFunctionReturn(PETSC_SUCCESS);
5064 }
5065 
5066 /*@
5067   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5068   matrices from each processor
5069 
5070   Collective
5071 
5072   Input Parameters:
5073 + comm   - the communicators the parallel matrix will live on
5074 . seqmat - the input sequential matrices
5075 . m      - number of local rows (or `PETSC_DECIDE`)
5076 . n      - number of local columns (or `PETSC_DECIDE`)
5077 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5078 
5079   Output Parameter:
5080 . mpimat - the parallel matrix generated
5081 
5082   Level: advanced
5083 
5084   Note:
5085   The dimensions of the sequential matrix in each processor MUST be the same.
5086   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5087   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5088 
5089 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5090 @*/
5091 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5092 {
5093   PetscMPIInt size;
5094 
5095   PetscFunctionBegin;
5096   PetscCallMPI(MPI_Comm_size(comm, &size));
5097   if (size == 1) {
5098     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5099     if (scall == MAT_INITIAL_MATRIX) {
5100       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5101     } else {
5102       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5103     }
5104     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5105     PetscFunctionReturn(PETSC_SUCCESS);
5106   }
5107   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5108   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5109   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5110   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5111   PetscFunctionReturn(PETSC_SUCCESS);
5112 }
5113 
5114 /*@
5115   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5116 
5117   Not Collective
5118 
5119   Input Parameter:
5120 . A - the matrix
5121 
5122   Output Parameter:
5123 . A_loc - the local sequential matrix generated
5124 
5125   Level: developer
5126 
5127   Notes:
5128   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5129   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5130   `n` is the global column count obtained with `MatGetSize()`
5131 
5132   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5133 
5134   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5135 
5136   Destroy the matrix with `MatDestroy()`
5137 
5138 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5139 @*/
5140 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5141 {
5142   PetscBool mpi;
5143 
5144   PetscFunctionBegin;
5145   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5146   if (mpi) {
5147     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5148   } else {
5149     *A_loc = A;
5150     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5151   }
5152   PetscFunctionReturn(PETSC_SUCCESS);
5153 }
5154 
5155 /*@
5156   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5157 
5158   Not Collective
5159 
5160   Input Parameters:
5161 + A     - the matrix
5162 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5163 
5164   Output Parameter:
5165 . A_loc - the local sequential matrix generated
5166 
5167   Level: developer
5168 
5169   Notes:
5170   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5171   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5172   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5173 
5174   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5175 
5176   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5177   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5178   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5179   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5180 
5181 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5182 @*/
5183 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5184 {
5185   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5186   Mat_SeqAIJ        *mat, *a, *b;
5187   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5188   const PetscScalar *aa, *ba, *aav, *bav;
5189   PetscScalar       *ca, *cam;
5190   PetscMPIInt        size;
5191   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5192   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5193   PetscBool          match;
5194 
5195   PetscFunctionBegin;
5196   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5197   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5198   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5199   if (size == 1) {
5200     if (scall == MAT_INITIAL_MATRIX) {
5201       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5202       *A_loc = mpimat->A;
5203     } else if (scall == MAT_REUSE_MATRIX) {
5204       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5205     }
5206     PetscFunctionReturn(PETSC_SUCCESS);
5207   }
5208 
5209   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5210   a  = (Mat_SeqAIJ *)mpimat->A->data;
5211   b  = (Mat_SeqAIJ *)mpimat->B->data;
5212   ai = a->i;
5213   aj = a->j;
5214   bi = b->i;
5215   bj = b->j;
5216   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5217   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5218   aa = aav;
5219   ba = bav;
5220   if (scall == MAT_INITIAL_MATRIX) {
5221     PetscCall(PetscMalloc1(1 + am, &ci));
5222     ci[0] = 0;
5223     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5224     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5225     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5226     k = 0;
5227     for (i = 0; i < am; i++) {
5228       ncols_o = bi[i + 1] - bi[i];
5229       ncols_d = ai[i + 1] - ai[i];
5230       /* off-diagonal portion of A */
5231       for (jo = 0; jo < ncols_o; jo++) {
5232         col = cmap[*bj];
5233         if (col >= cstart) break;
5234         cj[k] = col;
5235         bj++;
5236         ca[k++] = *ba++;
5237       }
5238       /* diagonal portion of A */
5239       for (j = 0; j < ncols_d; j++) {
5240         cj[k]   = cstart + *aj++;
5241         ca[k++] = *aa++;
5242       }
5243       /* off-diagonal portion of A */
5244       for (j = jo; j < ncols_o; j++) {
5245         cj[k]   = cmap[*bj++];
5246         ca[k++] = *ba++;
5247       }
5248     }
5249     /* put together the new matrix */
5250     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5251     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5252     /* Since these are PETSc arrays, change flags to free them as necessary. */
5253     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5254     mat->free_a  = PETSC_TRUE;
5255     mat->free_ij = PETSC_TRUE;
5256     mat->nonew   = 0;
5257   } else if (scall == MAT_REUSE_MATRIX) {
5258     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5259     ci  = mat->i;
5260     cj  = mat->j;
5261     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5262     for (i = 0; i < am; i++) {
5263       /* off-diagonal portion of A */
5264       ncols_o = bi[i + 1] - bi[i];
5265       for (jo = 0; jo < ncols_o; jo++) {
5266         col = cmap[*bj];
5267         if (col >= cstart) break;
5268         *cam++ = *ba++;
5269         bj++;
5270       }
5271       /* diagonal portion of A */
5272       ncols_d = ai[i + 1] - ai[i];
5273       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5274       /* off-diagonal portion of A */
5275       for (j = jo; j < ncols_o; j++) {
5276         *cam++ = *ba++;
5277         bj++;
5278       }
5279     }
5280     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5281   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5282   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5283   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5284   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5285   PetscFunctionReturn(PETSC_SUCCESS);
5286 }
5287 
5288 /*@
5289   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5290   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5291 
5292   Not Collective
5293 
5294   Input Parameters:
5295 + A     - the matrix
5296 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5297 
5298   Output Parameters:
5299 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5300 - A_loc - the local sequential matrix generated
5301 
5302   Level: developer
5303 
5304   Note:
5305   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5306   part, then those associated with the off-diagonal part (in its local ordering)
5307 
5308 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5309 @*/
5310 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5311 {
5312   Mat             Ao, Ad;
5313   const PetscInt *cmap;
5314   PetscMPIInt     size;
5315   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5316 
5317   PetscFunctionBegin;
5318   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5319   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5320   if (size == 1) {
5321     if (scall == MAT_INITIAL_MATRIX) {
5322       PetscCall(PetscObjectReference((PetscObject)Ad));
5323       *A_loc = Ad;
5324     } else if (scall == MAT_REUSE_MATRIX) {
5325       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5326     }
5327     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5328     PetscFunctionReturn(PETSC_SUCCESS);
5329   }
5330   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5331   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5332   if (f) {
5333     PetscCall((*f)(A, scall, glob, A_loc));
5334   } else {
5335     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5336     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5337     Mat_SeqAIJ        *c;
5338     PetscInt          *ai = a->i, *aj = a->j;
5339     PetscInt          *bi = b->i, *bj = b->j;
5340     PetscInt          *ci, *cj;
5341     const PetscScalar *aa, *ba;
5342     PetscScalar       *ca;
5343     PetscInt           i, j, am, dn, on;
5344 
5345     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5346     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5347     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5348     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5349     if (scall == MAT_INITIAL_MATRIX) {
5350       PetscInt k;
5351       PetscCall(PetscMalloc1(1 + am, &ci));
5352       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5353       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5354       ci[0] = 0;
5355       for (i = 0, k = 0; i < am; i++) {
5356         const PetscInt ncols_o = bi[i + 1] - bi[i];
5357         const PetscInt ncols_d = ai[i + 1] - ai[i];
5358         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5359         /* diagonal portion of A */
5360         for (j = 0; j < ncols_d; j++, k++) {
5361           cj[k] = *aj++;
5362           ca[k] = *aa++;
5363         }
5364         /* off-diagonal portion of A */
5365         for (j = 0; j < ncols_o; j++, k++) {
5366           cj[k] = dn + *bj++;
5367           ca[k] = *ba++;
5368         }
5369       }
5370       /* put together the new matrix */
5371       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5372       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5373       /* Since these are PETSc arrays, change flags to free them as necessary. */
5374       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5375       c->free_a  = PETSC_TRUE;
5376       c->free_ij = PETSC_TRUE;
5377       c->nonew   = 0;
5378       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5379     } else if (scall == MAT_REUSE_MATRIX) {
5380       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5381       for (i = 0; i < am; i++) {
5382         const PetscInt ncols_d = ai[i + 1] - ai[i];
5383         const PetscInt ncols_o = bi[i + 1] - bi[i];
5384         /* diagonal portion of A */
5385         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5386         /* off-diagonal portion of A */
5387         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5388       }
5389       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5390     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5391     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5392     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5393     if (glob) {
5394       PetscInt cst, *gidx;
5395 
5396       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5397       PetscCall(PetscMalloc1(dn + on, &gidx));
5398       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5399       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5400       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5401     }
5402   }
5403   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5404   PetscFunctionReturn(PETSC_SUCCESS);
5405 }
5406 
5407 /*@C
5408   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5409 
5410   Not Collective
5411 
5412   Input Parameters:
5413 + A     - the matrix
5414 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5415 . row   - index set of rows to extract (or `NULL`)
5416 - col   - index set of columns to extract (or `NULL`)
5417 
5418   Output Parameter:
5419 . A_loc - the local sequential matrix generated
5420 
5421   Level: developer
5422 
5423 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5424 @*/
5425 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5426 {
5427   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5428   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5429   IS          isrowa, iscola;
5430   Mat        *aloc;
5431   PetscBool   match;
5432 
5433   PetscFunctionBegin;
5434   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5435   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5436   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5437   if (!row) {
5438     start = A->rmap->rstart;
5439     end   = A->rmap->rend;
5440     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5441   } else {
5442     isrowa = *row;
5443   }
5444   if (!col) {
5445     start = A->cmap->rstart;
5446     cmap  = a->garray;
5447     nzA   = a->A->cmap->n;
5448     nzB   = a->B->cmap->n;
5449     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5450     ncols = 0;
5451     for (i = 0; i < nzB; i++) {
5452       if (cmap[i] < start) idx[ncols++] = cmap[i];
5453       else break;
5454     }
5455     imark = i;
5456     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5457     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5458     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5459   } else {
5460     iscola = *col;
5461   }
5462   if (scall != MAT_INITIAL_MATRIX) {
5463     PetscCall(PetscMalloc1(1, &aloc));
5464     aloc[0] = *A_loc;
5465   }
5466   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5467   if (!col) { /* attach global id of condensed columns */
5468     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5469   }
5470   *A_loc = aloc[0];
5471   PetscCall(PetscFree(aloc));
5472   if (!row) PetscCall(ISDestroy(&isrowa));
5473   if (!col) PetscCall(ISDestroy(&iscola));
5474   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5475   PetscFunctionReturn(PETSC_SUCCESS);
5476 }
5477 
5478 /*
5479  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5480  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5481  * on a global size.
5482  * */
5483 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5484 {
5485   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5486   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5487   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5488   PetscMPIInt            owner;
5489   PetscSFNode           *iremote, *oiremote;
5490   const PetscInt        *lrowindices;
5491   PetscSF                sf, osf;
5492   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5493   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5494   MPI_Comm               comm;
5495   ISLocalToGlobalMapping mapping;
5496   const PetscScalar     *pd_a, *po_a;
5497 
5498   PetscFunctionBegin;
5499   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5500   /* plocalsize is the number of roots
5501    * nrows is the number of leaves
5502    * */
5503   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5504   PetscCall(ISGetLocalSize(rows, &nrows));
5505   PetscCall(PetscCalloc1(nrows, &iremote));
5506   PetscCall(ISGetIndices(rows, &lrowindices));
5507   for (i = 0; i < nrows; i++) {
5508     /* Find a remote index and an owner for a row
5509      * The row could be local or remote
5510      * */
5511     owner = 0;
5512     lidx  = 0;
5513     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5514     iremote[i].index = lidx;
5515     iremote[i].rank  = owner;
5516   }
5517   /* Create SF to communicate how many nonzero columns for each row */
5518   PetscCall(PetscSFCreate(comm, &sf));
5519   /* SF will figure out the number of nonzero columns for each row, and their
5520    * offsets
5521    * */
5522   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5523   PetscCall(PetscSFSetFromOptions(sf));
5524   PetscCall(PetscSFSetUp(sf));
5525 
5526   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5527   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5528   PetscCall(PetscCalloc1(nrows, &pnnz));
5529   roffsets[0] = 0;
5530   roffsets[1] = 0;
5531   for (i = 0; i < plocalsize; i++) {
5532     /* diagonal */
5533     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5534     /* off-diagonal */
5535     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5536     /* compute offsets so that we relative location for each row */
5537     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5538     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5539   }
5540   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5541   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5542   /* 'r' means root, and 'l' means leaf */
5543   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5544   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5545   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5546   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5547   PetscCall(PetscSFDestroy(&sf));
5548   PetscCall(PetscFree(roffsets));
5549   PetscCall(PetscFree(nrcols));
5550   dntotalcols = 0;
5551   ontotalcols = 0;
5552   ncol        = 0;
5553   for (i = 0; i < nrows; i++) {
5554     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5555     ncol    = PetscMax(pnnz[i], ncol);
5556     /* diagonal */
5557     dntotalcols += nlcols[i * 2 + 0];
5558     /* off-diagonal */
5559     ontotalcols += nlcols[i * 2 + 1];
5560   }
5561   /* We do not need to figure the right number of columns
5562    * since all the calculations will be done by going through the raw data
5563    * */
5564   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5565   PetscCall(MatSetUp(*P_oth));
5566   PetscCall(PetscFree(pnnz));
5567   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5568   /* diagonal */
5569   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5570   /* off-diagonal */
5571   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5572   /* diagonal */
5573   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5574   /* off-diagonal */
5575   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5576   dntotalcols = 0;
5577   ontotalcols = 0;
5578   ntotalcols  = 0;
5579   for (i = 0; i < nrows; i++) {
5580     owner = 0;
5581     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5582     /* Set iremote for diag matrix */
5583     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5584       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5585       iremote[dntotalcols].rank  = owner;
5586       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5587       ilocal[dntotalcols++] = ntotalcols++;
5588     }
5589     /* off-diagonal */
5590     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5591       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5592       oiremote[ontotalcols].rank  = owner;
5593       oilocal[ontotalcols++]      = ntotalcols++;
5594     }
5595   }
5596   PetscCall(ISRestoreIndices(rows, &lrowindices));
5597   PetscCall(PetscFree(loffsets));
5598   PetscCall(PetscFree(nlcols));
5599   PetscCall(PetscSFCreate(comm, &sf));
5600   /* P serves as roots and P_oth is leaves
5601    * Diag matrix
5602    * */
5603   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5604   PetscCall(PetscSFSetFromOptions(sf));
5605   PetscCall(PetscSFSetUp(sf));
5606 
5607   PetscCall(PetscSFCreate(comm, &osf));
5608   /* off-diagonal */
5609   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5610   PetscCall(PetscSFSetFromOptions(osf));
5611   PetscCall(PetscSFSetUp(osf));
5612   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5613   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5614   /* operate on the matrix internal data to save memory */
5615   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5616   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5617   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5618   /* Convert to global indices for diag matrix */
5619   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5620   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5621   /* We want P_oth store global indices */
5622   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5623   /* Use memory scalable approach */
5624   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5625   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5626   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5627   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5628   /* Convert back to local indices */
5629   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5630   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5631   nout = 0;
5632   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5633   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5634   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5635   /* Exchange values */
5636   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5637   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5638   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5639   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5640   /* Stop PETSc from shrinking memory */
5641   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5642   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5643   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5644   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5645   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5646   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5647   PetscCall(PetscSFDestroy(&sf));
5648   PetscCall(PetscSFDestroy(&osf));
5649   PetscFunctionReturn(PETSC_SUCCESS);
5650 }
5651 
5652 /*
5653  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5654  * This supports MPIAIJ and MAIJ
5655  * */
5656 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5657 {
5658   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5659   Mat_SeqAIJ *p_oth;
5660   IS          rows, map;
5661   PetscHMapI  hamp;
5662   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5663   MPI_Comm    comm;
5664   PetscSF     sf, osf;
5665   PetscBool   has;
5666 
5667   PetscFunctionBegin;
5668   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5669   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5670   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5671    *  and then create a submatrix (that often is an overlapping matrix)
5672    * */
5673   if (reuse == MAT_INITIAL_MATRIX) {
5674     /* Use a hash table to figure out unique keys */
5675     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5676     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5677     count = 0;
5678     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5679     for (i = 0; i < a->B->cmap->n; i++) {
5680       key = a->garray[i] / dof;
5681       PetscCall(PetscHMapIHas(hamp, key, &has));
5682       if (!has) {
5683         mapping[i] = count;
5684         PetscCall(PetscHMapISet(hamp, key, count++));
5685       } else {
5686         /* Current 'i' has the same value the previous step */
5687         mapping[i] = count - 1;
5688       }
5689     }
5690     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5691     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5692     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5693     PetscCall(PetscCalloc1(htsize, &rowindices));
5694     off = 0;
5695     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5696     PetscCall(PetscHMapIDestroy(&hamp));
5697     PetscCall(PetscSortInt(htsize, rowindices));
5698     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5699     /* In case, the matrix was already created but users want to recreate the matrix */
5700     PetscCall(MatDestroy(P_oth));
5701     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5702     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5703     PetscCall(ISDestroy(&map));
5704     PetscCall(ISDestroy(&rows));
5705   } else if (reuse == MAT_REUSE_MATRIX) {
5706     /* If matrix was already created, we simply update values using SF objects
5707      * that as attached to the matrix earlier.
5708      */
5709     const PetscScalar *pd_a, *po_a;
5710 
5711     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5712     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5713     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5714     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5715     /* Update values in place */
5716     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5717     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5718     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5719     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5720     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5721     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5722     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5723     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5724   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5725   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5726   PetscFunctionReturn(PETSC_SUCCESS);
5727 }
5728 
5729 /*@C
5730   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5731 
5732   Collective
5733 
5734   Input Parameters:
5735 + A     - the first matrix in `MATMPIAIJ` format
5736 . B     - the second matrix in `MATMPIAIJ` format
5737 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5738 
5739   Output Parameters:
5740 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5741 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5742 - B_seq - the sequential matrix generated
5743 
5744   Level: developer
5745 
5746 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5747 @*/
5748 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5749 {
5750   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5751   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5752   IS          isrowb, iscolb;
5753   Mat        *bseq = NULL;
5754 
5755   PetscFunctionBegin;
5756   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5757              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5758   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5759 
5760   if (scall == MAT_INITIAL_MATRIX) {
5761     start = A->cmap->rstart;
5762     cmap  = a->garray;
5763     nzA   = a->A->cmap->n;
5764     nzB   = a->B->cmap->n;
5765     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5766     ncols = 0;
5767     for (i = 0; i < nzB; i++) { /* row < local row index */
5768       if (cmap[i] < start) idx[ncols++] = cmap[i];
5769       else break;
5770     }
5771     imark = i;
5772     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5773     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5774     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5775     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5776   } else {
5777     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5778     isrowb = *rowb;
5779     iscolb = *colb;
5780     PetscCall(PetscMalloc1(1, &bseq));
5781     bseq[0] = *B_seq;
5782   }
5783   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5784   *B_seq = bseq[0];
5785   PetscCall(PetscFree(bseq));
5786   if (!rowb) {
5787     PetscCall(ISDestroy(&isrowb));
5788   } else {
5789     *rowb = isrowb;
5790   }
5791   if (!colb) {
5792     PetscCall(ISDestroy(&iscolb));
5793   } else {
5794     *colb = iscolb;
5795   }
5796   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5797   PetscFunctionReturn(PETSC_SUCCESS);
5798 }
5799 
5800 /*
5801     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5802     of the OFF-DIAGONAL portion of local A
5803 
5804     Collective
5805 
5806    Input Parameters:
5807 +    A,B - the matrices in `MATMPIAIJ` format
5808 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5809 
5810    Output Parameter:
5811 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5812 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5813 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5814 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5815 
5816     Developer Note:
5817     This directly accesses information inside the VecScatter associated with the matrix-vector product
5818      for this matrix. This is not desirable..
5819 
5820     Level: developer
5821 
5822 */
5823 
5824 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5825 {
5826   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5827   VecScatter         ctx;
5828   MPI_Comm           comm;
5829   const PetscMPIInt *rprocs, *sprocs;
5830   PetscMPIInt        nrecvs, nsends;
5831   const PetscInt    *srow, *rstarts, *sstarts;
5832   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5833   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5834   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5835   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5836   PetscMPIInt        size, tag, rank, nreqs;
5837 
5838   PetscFunctionBegin;
5839   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5840   PetscCallMPI(MPI_Comm_size(comm, &size));
5841 
5842   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5843              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5844   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5845   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5846 
5847   if (size == 1) {
5848     startsj_s = NULL;
5849     bufa_ptr  = NULL;
5850     *B_oth    = NULL;
5851     PetscFunctionReturn(PETSC_SUCCESS);
5852   }
5853 
5854   ctx = a->Mvctx;
5855   tag = ((PetscObject)ctx)->tag;
5856 
5857   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5858   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5859   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5860   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5861   PetscCall(PetscMalloc1(nreqs, &reqs));
5862   rwaits = reqs;
5863   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5864 
5865   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5866   if (scall == MAT_INITIAL_MATRIX) {
5867     /* i-array */
5868     /*  post receives */
5869     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5870     for (i = 0; i < nrecvs; i++) {
5871       rowlen = rvalues + rstarts[i] * rbs;
5872       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5873       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5874     }
5875 
5876     /* pack the outgoing message */
5877     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5878 
5879     sstartsj[0] = 0;
5880     rstartsj[0] = 0;
5881     len         = 0; /* total length of j or a array to be sent */
5882     if (nsends) {
5883       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5884       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5885     }
5886     for (i = 0; i < nsends; i++) {
5887       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5888       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5889       for (j = 0; j < nrows; j++) {
5890         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5891         for (l = 0; l < sbs; l++) {
5892           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5893 
5894           rowlen[j * sbs + l] = ncols;
5895 
5896           len += ncols;
5897           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5898         }
5899         k++;
5900       }
5901       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5902 
5903       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5904     }
5905     /* recvs and sends of i-array are completed */
5906     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5907     PetscCall(PetscFree(svalues));
5908 
5909     /* allocate buffers for sending j and a arrays */
5910     PetscCall(PetscMalloc1(len + 1, &bufj));
5911     PetscCall(PetscMalloc1(len + 1, &bufa));
5912 
5913     /* create i-array of B_oth */
5914     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5915 
5916     b_othi[0] = 0;
5917     len       = 0; /* total length of j or a array to be received */
5918     k         = 0;
5919     for (i = 0; i < nrecvs; i++) {
5920       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5921       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5922       for (j = 0; j < nrows; j++) {
5923         b_othi[k + 1] = b_othi[k] + rowlen[j];
5924         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5925         k++;
5926       }
5927       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5928     }
5929     PetscCall(PetscFree(rvalues));
5930 
5931     /* allocate space for j and a arrays of B_oth */
5932     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5933     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5934 
5935     /* j-array */
5936     /*  post receives of j-array */
5937     for (i = 0; i < nrecvs; i++) {
5938       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5939       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5940     }
5941 
5942     /* pack the outgoing message j-array */
5943     if (nsends) k = sstarts[0];
5944     for (i = 0; i < nsends; i++) {
5945       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5946       bufJ  = bufj + sstartsj[i];
5947       for (j = 0; j < nrows; j++) {
5948         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5949         for (ll = 0; ll < sbs; ll++) {
5950           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5951           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5952           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5953         }
5954       }
5955       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5956     }
5957 
5958     /* recvs and sends of j-array are completed */
5959     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5960   } else if (scall == MAT_REUSE_MATRIX) {
5961     sstartsj = *startsj_s;
5962     rstartsj = *startsj_r;
5963     bufa     = *bufa_ptr;
5964     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5965   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5966 
5967   /* a-array */
5968   /*  post receives of a-array */
5969   for (i = 0; i < nrecvs; i++) {
5970     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5971     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5972   }
5973 
5974   /* pack the outgoing message a-array */
5975   if (nsends) k = sstarts[0];
5976   for (i = 0; i < nsends; i++) {
5977     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5978     bufA  = bufa + sstartsj[i];
5979     for (j = 0; j < nrows; j++) {
5980       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5981       for (ll = 0; ll < sbs; ll++) {
5982         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5983         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5984         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5985       }
5986     }
5987     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5988   }
5989   /* recvs and sends of a-array are completed */
5990   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5991   PetscCall(PetscFree(reqs));
5992 
5993   if (scall == MAT_INITIAL_MATRIX) {
5994     Mat_SeqAIJ *b_oth;
5995 
5996     /* put together the new matrix */
5997     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5998 
5999     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6000     /* Since these are PETSc arrays, change flags to free them as necessary. */
6001     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6002     b_oth->free_a  = PETSC_TRUE;
6003     b_oth->free_ij = PETSC_TRUE;
6004     b_oth->nonew   = 0;
6005 
6006     PetscCall(PetscFree(bufj));
6007     if (!startsj_s || !bufa_ptr) {
6008       PetscCall(PetscFree2(sstartsj, rstartsj));
6009       PetscCall(PetscFree(bufa_ptr));
6010     } else {
6011       *startsj_s = sstartsj;
6012       *startsj_r = rstartsj;
6013       *bufa_ptr  = bufa;
6014     }
6015   } else if (scall == MAT_REUSE_MATRIX) {
6016     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6017   }
6018 
6019   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6020   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6021   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6022   PetscFunctionReturn(PETSC_SUCCESS);
6023 }
6024 
6025 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6026 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6028 #if defined(PETSC_HAVE_MKL_SPARSE)
6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6030 #endif
6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6033 #if defined(PETSC_HAVE_ELEMENTAL)
6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6035 #endif
6036 #if defined(PETSC_HAVE_SCALAPACK)
6037 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6038 #endif
6039 #if defined(PETSC_HAVE_HYPRE)
6040 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6041 #endif
6042 #if defined(PETSC_HAVE_CUDA)
6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6044 #endif
6045 #if defined(PETSC_HAVE_HIP)
6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6047 #endif
6048 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6050 #endif
6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6052 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6053 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6054 
6055 /*
6056     Computes (B'*A')' since computing B*A directly is untenable
6057 
6058                n                       p                          p
6059         [             ]       [             ]         [                 ]
6060       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6061         [             ]       [             ]         [                 ]
6062 
6063 */
6064 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6065 {
6066   Mat At, Bt, Ct;
6067 
6068   PetscFunctionBegin;
6069   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6070   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6071   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6072   PetscCall(MatDestroy(&At));
6073   PetscCall(MatDestroy(&Bt));
6074   PetscCall(MatTransposeSetPrecursor(Ct, C));
6075   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6076   PetscCall(MatDestroy(&Ct));
6077   PetscFunctionReturn(PETSC_SUCCESS);
6078 }
6079 
6080 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6081 {
6082   PetscBool cisdense;
6083 
6084   PetscFunctionBegin;
6085   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6086   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6087   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6088   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6089   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6090   PetscCall(MatSetUp(C));
6091 
6092   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6093   PetscFunctionReturn(PETSC_SUCCESS);
6094 }
6095 
6096 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6097 {
6098   Mat_Product *product = C->product;
6099   Mat          A = product->A, B = product->B;
6100 
6101   PetscFunctionBegin;
6102   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6103              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6104   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6105   C->ops->productsymbolic = MatProductSymbolic_AB;
6106   PetscFunctionReturn(PETSC_SUCCESS);
6107 }
6108 
6109 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6110 {
6111   Mat_Product *product = C->product;
6112 
6113   PetscFunctionBegin;
6114   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6115   PetscFunctionReturn(PETSC_SUCCESS);
6116 }
6117 
6118 /*
6119    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6120 
6121   Input Parameters:
6122 
6123     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6124     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6125 
6126     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6127 
6128     For Set1, j1[] contains column indices of the nonzeros.
6129     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6130     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6131     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6132 
6133     Similar for Set2.
6134 
6135     This routine merges the two sets of nonzeros row by row and removes repeats.
6136 
6137   Output Parameters: (memory is allocated by the caller)
6138 
6139     i[],j[]: the CSR of the merged matrix, which has m rows.
6140     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6141     imap2[]: similar to imap1[], but for Set2.
6142     Note we order nonzeros row-by-row and from left to right.
6143 */
6144 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6145 {
6146   PetscInt   r, m; /* Row index of mat */
6147   PetscCount t, t1, t2, b1, e1, b2, e2;
6148 
6149   PetscFunctionBegin;
6150   PetscCall(MatGetLocalSize(mat, &m, NULL));
6151   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6152   i[0]        = 0;
6153   for (r = 0; r < m; r++) { /* Do row by row merging */
6154     b1 = rowBegin1[r];
6155     e1 = rowEnd1[r];
6156     b2 = rowBegin2[r];
6157     e2 = rowEnd2[r];
6158     while (b1 < e1 && b2 < e2) {
6159       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6160         j[t]      = j1[b1];
6161         imap1[t1] = t;
6162         imap2[t2] = t;
6163         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6164         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6165         t1++;
6166         t2++;
6167         t++;
6168       } else if (j1[b1] < j2[b2]) {
6169         j[t]      = j1[b1];
6170         imap1[t1] = t;
6171         b1 += jmap1[t1 + 1] - jmap1[t1];
6172         t1++;
6173         t++;
6174       } else {
6175         j[t]      = j2[b2];
6176         imap2[t2] = t;
6177         b2 += jmap2[t2 + 1] - jmap2[t2];
6178         t2++;
6179         t++;
6180       }
6181     }
6182     /* Merge the remaining in either j1[] or j2[] */
6183     while (b1 < e1) {
6184       j[t]      = j1[b1];
6185       imap1[t1] = t;
6186       b1 += jmap1[t1 + 1] - jmap1[t1];
6187       t1++;
6188       t++;
6189     }
6190     while (b2 < e2) {
6191       j[t]      = j2[b2];
6192       imap2[t2] = t;
6193       b2 += jmap2[t2 + 1] - jmap2[t2];
6194       t2++;
6195       t++;
6196     }
6197     PetscCall(PetscIntCast(t, i + r + 1));
6198   }
6199   PetscFunctionReturn(PETSC_SUCCESS);
6200 }
6201 
6202 /*
6203   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6204 
6205   Input Parameters:
6206     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6207     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6208       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6209 
6210       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6211       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6212 
6213   Output Parameters:
6214     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6215     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6216       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6217       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6218 
6219     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6220       Atot: number of entries belonging to the diagonal block.
6221       Annz: number of unique nonzeros belonging to the diagonal block.
6222       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6223         repeats (i.e., same 'i,j' pair).
6224       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6225         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6226 
6227       Atot: number of entries belonging to the diagonal block
6228       Annz: number of unique nonzeros belonging to the diagonal block.
6229 
6230     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6231 
6232     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6233 */
6234 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6235 {
6236   PetscInt    cstart, cend, rstart, rend, row, col;
6237   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6238   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6239   PetscCount  k, m, p, q, r, s, mid;
6240   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6241 
6242   PetscFunctionBegin;
6243   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6244   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6245   m = rend - rstart;
6246 
6247   /* Skip negative rows */
6248   for (k = 0; k < n; k++)
6249     if (i[k] >= 0) break;
6250 
6251   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6252      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6253   */
6254   while (k < n) {
6255     row = i[k];
6256     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6257     for (s = k; s < n; s++)
6258       if (i[s] != row) break;
6259 
6260     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6261     for (p = k; p < s; p++) {
6262       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6263       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6264     }
6265     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6266     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6267     rowBegin[row - rstart] = k;
6268     rowMid[row - rstart]   = mid;
6269     rowEnd[row - rstart]   = s;
6270 
6271     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6272     Atot += mid - k;
6273     Btot += s - mid;
6274 
6275     /* Count unique nonzeros of this diag row */
6276     for (p = k; p < mid;) {
6277       col = j[p];
6278       do {
6279         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6280         p++;
6281       } while (p < mid && j[p] == col);
6282       Annz++;
6283     }
6284 
6285     /* Count unique nonzeros of this offdiag row */
6286     for (p = mid; p < s;) {
6287       col = j[p];
6288       do {
6289         p++;
6290       } while (p < s && j[p] == col);
6291       Bnnz++;
6292     }
6293     k = s;
6294   }
6295 
6296   /* Allocation according to Atot, Btot, Annz, Bnnz */
6297   PetscCall(PetscMalloc1(Atot, &Aperm));
6298   PetscCall(PetscMalloc1(Btot, &Bperm));
6299   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6300   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6301 
6302   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6303   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6304   for (r = 0; r < m; r++) {
6305     k   = rowBegin[r];
6306     mid = rowMid[r];
6307     s   = rowEnd[r];
6308     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6309     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6310     Atot += mid - k;
6311     Btot += s - mid;
6312 
6313     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6314     for (p = k; p < mid;) {
6315       col = j[p];
6316       q   = p;
6317       do {
6318         p++;
6319       } while (p < mid && j[p] == col);
6320       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6321       Annz++;
6322     }
6323 
6324     for (p = mid; p < s;) {
6325       col = j[p];
6326       q   = p;
6327       do {
6328         p++;
6329       } while (p < s && j[p] == col);
6330       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6331       Bnnz++;
6332     }
6333   }
6334   /* Output */
6335   *Aperm_ = Aperm;
6336   *Annz_  = Annz;
6337   *Atot_  = Atot;
6338   *Ajmap_ = Ajmap;
6339   *Bperm_ = Bperm;
6340   *Bnnz_  = Bnnz;
6341   *Btot_  = Btot;
6342   *Bjmap_ = Bjmap;
6343   PetscFunctionReturn(PETSC_SUCCESS);
6344 }
6345 
6346 /*
6347   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6348 
6349   Input Parameters:
6350     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6351     nnz:  number of unique nonzeros in the merged matrix
6352     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6353     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6354 
6355   Output Parameter: (memory is allocated by the caller)
6356     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6357 
6358   Example:
6359     nnz1 = 4
6360     nnz  = 6
6361     imap = [1,3,4,5]
6362     jmap = [0,3,5,6,7]
6363    then,
6364     jmap_new = [0,0,3,3,5,6,7]
6365 */
6366 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6367 {
6368   PetscCount k, p;
6369 
6370   PetscFunctionBegin;
6371   jmap_new[0] = 0;
6372   p           = nnz;                /* p loops over jmap_new[] backwards */
6373   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6374     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6375   }
6376   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6377   PetscFunctionReturn(PETSC_SUCCESS);
6378 }
6379 
6380 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
6381 {
6382   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
6383 
6384   PetscFunctionBegin;
6385   PetscCall(PetscSFDestroy(&coo->sf));
6386   PetscCall(PetscFree(coo->Aperm1));
6387   PetscCall(PetscFree(coo->Bperm1));
6388   PetscCall(PetscFree(coo->Ajmap1));
6389   PetscCall(PetscFree(coo->Bjmap1));
6390   PetscCall(PetscFree(coo->Aimap2));
6391   PetscCall(PetscFree(coo->Bimap2));
6392   PetscCall(PetscFree(coo->Aperm2));
6393   PetscCall(PetscFree(coo->Bperm2));
6394   PetscCall(PetscFree(coo->Ajmap2));
6395   PetscCall(PetscFree(coo->Bjmap2));
6396   PetscCall(PetscFree(coo->Cperm1));
6397   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6398   PetscCall(PetscFree(coo));
6399   PetscFunctionReturn(PETSC_SUCCESS);
6400 }
6401 
6402 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6403 {
6404   MPI_Comm             comm;
6405   PetscMPIInt          rank, size;
6406   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6407   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6408   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6409   PetscContainer       container;
6410   MatCOOStruct_MPIAIJ *coo;
6411 
6412   PetscFunctionBegin;
6413   PetscCall(PetscFree(mpiaij->garray));
6414   PetscCall(VecDestroy(&mpiaij->lvec));
6415 #if defined(PETSC_USE_CTABLE)
6416   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6417 #else
6418   PetscCall(PetscFree(mpiaij->colmap));
6419 #endif
6420   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6421   mat->assembled     = PETSC_FALSE;
6422   mat->was_assembled = PETSC_FALSE;
6423 
6424   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6425   PetscCallMPI(MPI_Comm_size(comm, &size));
6426   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6427   PetscCall(PetscLayoutSetUp(mat->rmap));
6428   PetscCall(PetscLayoutSetUp(mat->cmap));
6429   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6430   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6431   PetscCall(MatGetLocalSize(mat, &m, &n));
6432   PetscCall(MatGetSize(mat, &M, &N));
6433 
6434   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6435   /* entries come first, then local rows, then remote rows.                     */
6436   PetscCount n1 = coo_n, *perm1;
6437   PetscInt  *i1 = coo_i, *j1 = coo_j;
6438 
6439   PetscCall(PetscMalloc1(n1, &perm1));
6440   for (k = 0; k < n1; k++) perm1[k] = k;
6441 
6442   /* Manipulate indices so that entries with negative row or col indices will have smallest
6443      row indices, local entries will have greater but negative row indices, and remote entries
6444      will have positive row indices.
6445   */
6446   for (k = 0; k < n1; k++) {
6447     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6448     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6449     else {
6450       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6451       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6452     }
6453   }
6454 
6455   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6456   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6457 
6458   /* Advance k to the first entry we need to take care of */
6459   for (k = 0; k < n1; k++)
6460     if (i1[k] > PETSC_INT_MIN) break;
6461   PetscCount i1start = k;
6462 
6463   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6464   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6465 
6466   /*           Send remote rows to their owner                                  */
6467   /* Find which rows should be sent to which remote ranks*/
6468   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6469   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6470   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6471   const PetscInt *ranges;
6472   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6473 
6474   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6475   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6476   for (k = rem; k < n1;) {
6477     PetscMPIInt owner;
6478     PetscInt    firstRow, lastRow;
6479 
6480     /* Locate a row range */
6481     firstRow = i1[k]; /* first row of this owner */
6482     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6483     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6484 
6485     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6486     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6487 
6488     /* All entries in [k,p) belong to this remote owner */
6489     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6490       PetscMPIInt *sendto2;
6491       PetscInt    *nentries2;
6492       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6493 
6494       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6495       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6496       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6497       PetscCall(PetscFree2(sendto, nentries2));
6498       sendto   = sendto2;
6499       nentries = nentries2;
6500       maxNsend = maxNsend2;
6501     }
6502     sendto[nsend] = owner;
6503     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6504     nsend++;
6505     k = p;
6506   }
6507 
6508   /* Build 1st SF to know offsets on remote to send data */
6509   PetscSF      sf1;
6510   PetscInt     nroots = 1, nroots2 = 0;
6511   PetscInt     nleaves = nsend, nleaves2 = 0;
6512   PetscInt    *offsets;
6513   PetscSFNode *iremote;
6514 
6515   PetscCall(PetscSFCreate(comm, &sf1));
6516   PetscCall(PetscMalloc1(nsend, &iremote));
6517   PetscCall(PetscMalloc1(nsend, &offsets));
6518   for (k = 0; k < nsend; k++) {
6519     iremote[k].rank  = sendto[k];
6520     iremote[k].index = 0;
6521     nleaves2 += nentries[k];
6522     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6523   }
6524   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6525   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6526   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6527   PetscCall(PetscSFDestroy(&sf1));
6528   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6529 
6530   /* Build 2nd SF to send remote COOs to their owner */
6531   PetscSF sf2;
6532   nroots  = nroots2;
6533   nleaves = nleaves2;
6534   PetscCall(PetscSFCreate(comm, &sf2));
6535   PetscCall(PetscSFSetFromOptions(sf2));
6536   PetscCall(PetscMalloc1(nleaves, &iremote));
6537   p = 0;
6538   for (k = 0; k < nsend; k++) {
6539     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6540     for (q = 0; q < nentries[k]; q++, p++) {
6541       iremote[p].rank = sendto[k];
6542       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6543     }
6544   }
6545   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6546 
6547   /* Send the remote COOs to their owner */
6548   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6549   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6550   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6551   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6552   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6553   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6554   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6555   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6556   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6557   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6558   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6559 
6560   PetscCall(PetscFree(offsets));
6561   PetscCall(PetscFree2(sendto, nentries));
6562 
6563   /* Sort received COOs by row along with the permutation array     */
6564   for (k = 0; k < n2; k++) perm2[k] = k;
6565   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6566 
6567   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6568   PetscCount *Cperm1;
6569   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6570   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6571   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6572   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6573 
6574   /* Support for HYPRE matrices, kind of a hack.
6575      Swap min column with diagonal so that diagonal values will go first */
6576   PetscBool hypre;
6577   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6578   if (hypre) {
6579     PetscInt *minj;
6580     PetscBT   hasdiag;
6581 
6582     PetscCall(PetscBTCreate(m, &hasdiag));
6583     PetscCall(PetscMalloc1(m, &minj));
6584     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6585     for (k = i1start; k < rem; k++) {
6586       if (j1[k] < cstart || j1[k] >= cend) continue;
6587       const PetscInt rindex = i1[k] - rstart;
6588       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6589       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6590     }
6591     for (k = 0; k < n2; k++) {
6592       if (j2[k] < cstart || j2[k] >= cend) continue;
6593       const PetscInt rindex = i2[k] - rstart;
6594       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6595       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6596     }
6597     for (k = i1start; k < rem; k++) {
6598       const PetscInt rindex = i1[k] - rstart;
6599       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6600       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6601       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6602     }
6603     for (k = 0; k < n2; k++) {
6604       const PetscInt rindex = i2[k] - rstart;
6605       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6606       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6607       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6608     }
6609     PetscCall(PetscBTDestroy(&hasdiag));
6610     PetscCall(PetscFree(minj));
6611   }
6612 
6613   /* Split local COOs and received COOs into diag/offdiag portions */
6614   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6615   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6616   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6617   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6618   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6619   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6620 
6621   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6622   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6623   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6624   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6625 
6626   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6627   PetscInt *Ai, *Bi;
6628   PetscInt *Aj, *Bj;
6629 
6630   PetscCall(PetscMalloc1(m + 1, &Ai));
6631   PetscCall(PetscMalloc1(m + 1, &Bi));
6632   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6633   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6634 
6635   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6636   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6637   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6638   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6639   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6640 
6641   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6642   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6643 
6644   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6645   /* expect nonzeros in A/B most likely have local contributing entries        */
6646   PetscInt    Annz = Ai[m];
6647   PetscInt    Bnnz = Bi[m];
6648   PetscCount *Ajmap1_new, *Bjmap1_new;
6649 
6650   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6651   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6652 
6653   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6654   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6655 
6656   PetscCall(PetscFree(Aimap1));
6657   PetscCall(PetscFree(Ajmap1));
6658   PetscCall(PetscFree(Bimap1));
6659   PetscCall(PetscFree(Bjmap1));
6660   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6661   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6662   PetscCall(PetscFree(perm1));
6663   PetscCall(PetscFree3(i2, j2, perm2));
6664 
6665   Ajmap1 = Ajmap1_new;
6666   Bjmap1 = Bjmap1_new;
6667 
6668   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6669   if (Annz < Annz1 + Annz2) {
6670     PetscInt *Aj_new;
6671     PetscCall(PetscMalloc1(Annz, &Aj_new));
6672     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6673     PetscCall(PetscFree(Aj));
6674     Aj = Aj_new;
6675   }
6676 
6677   if (Bnnz < Bnnz1 + Bnnz2) {
6678     PetscInt *Bj_new;
6679     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6680     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6681     PetscCall(PetscFree(Bj));
6682     Bj = Bj_new;
6683   }
6684 
6685   /* Create new submatrices for on-process and off-process coupling                  */
6686   PetscScalar     *Aa, *Ba;
6687   MatType          rtype;
6688   Mat_SeqAIJ      *a, *b;
6689   PetscObjectState state;
6690   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6691   PetscCall(PetscCalloc1(Bnnz, &Ba));
6692   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6693   if (cstart) {
6694     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6695   }
6696 
6697   PetscCall(MatGetRootType_Private(mat, &rtype));
6698 
6699   MatSeqXAIJGetOptions_Private(mpiaij->A);
6700   PetscCall(MatDestroy(&mpiaij->A));
6701   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6702   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6703   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6704 
6705   MatSeqXAIJGetOptions_Private(mpiaij->B);
6706   PetscCall(MatDestroy(&mpiaij->B));
6707   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6708   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6709   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6710 
6711   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6712   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6713   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6714   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6715 
6716   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6717   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6718   a->free_a  = PETSC_TRUE;
6719   a->free_ij = PETSC_TRUE;
6720   b->free_a  = PETSC_TRUE;
6721   b->free_ij = PETSC_TRUE;
6722   a->maxnz   = a->nz;
6723   b->maxnz   = b->nz;
6724 
6725   /* conversion must happen AFTER multiply setup */
6726   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6727   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6728   PetscCall(VecDestroy(&mpiaij->lvec));
6729   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6730 
6731   // Put the COO struct in a container and then attach that to the matrix
6732   PetscCall(PetscMalloc1(1, &coo));
6733   coo->n       = coo_n;
6734   coo->sf      = sf2;
6735   coo->sendlen = nleaves;
6736   coo->recvlen = nroots;
6737   coo->Annz    = Annz;
6738   coo->Bnnz    = Bnnz;
6739   coo->Annz2   = Annz2;
6740   coo->Bnnz2   = Bnnz2;
6741   coo->Atot1   = Atot1;
6742   coo->Atot2   = Atot2;
6743   coo->Btot1   = Btot1;
6744   coo->Btot2   = Btot2;
6745   coo->Ajmap1  = Ajmap1;
6746   coo->Aperm1  = Aperm1;
6747   coo->Bjmap1  = Bjmap1;
6748   coo->Bperm1  = Bperm1;
6749   coo->Aimap2  = Aimap2;
6750   coo->Ajmap2  = Ajmap2;
6751   coo->Aperm2  = Aperm2;
6752   coo->Bimap2  = Bimap2;
6753   coo->Bjmap2  = Bjmap2;
6754   coo->Bperm2  = Bperm2;
6755   coo->Cperm1  = Cperm1;
6756   // Allocate in preallocation. If not used, it has zero cost on host
6757   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6758   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6759   PetscCall(PetscContainerSetPointer(container, coo));
6760   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6761   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6762   PetscCall(PetscContainerDestroy(&container));
6763   PetscFunctionReturn(PETSC_SUCCESS);
6764 }
6765 
6766 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6767 {
6768   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6769   Mat                  A = mpiaij->A, B = mpiaij->B;
6770   PetscScalar         *Aa, *Ba;
6771   PetscScalar         *sendbuf, *recvbuf;
6772   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6773   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6774   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6775   const PetscCount    *Cperm1;
6776   PetscContainer       container;
6777   MatCOOStruct_MPIAIJ *coo;
6778 
6779   PetscFunctionBegin;
6780   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6781   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6782   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6783   sendbuf = coo->sendbuf;
6784   recvbuf = coo->recvbuf;
6785   Ajmap1  = coo->Ajmap1;
6786   Ajmap2  = coo->Ajmap2;
6787   Aimap2  = coo->Aimap2;
6788   Bjmap1  = coo->Bjmap1;
6789   Bjmap2  = coo->Bjmap2;
6790   Bimap2  = coo->Bimap2;
6791   Aperm1  = coo->Aperm1;
6792   Aperm2  = coo->Aperm2;
6793   Bperm1  = coo->Bperm1;
6794   Bperm2  = coo->Bperm2;
6795   Cperm1  = coo->Cperm1;
6796 
6797   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6798   PetscCall(MatSeqAIJGetArray(B, &Ba));
6799 
6800   /* Pack entries to be sent to remote */
6801   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6802 
6803   /* Send remote entries to their owner and overlap the communication with local computation */
6804   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6805   /* Add local entries to A and B */
6806   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6807     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6808     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6809     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6810   }
6811   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6812     PetscScalar sum = 0.0;
6813     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6814     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6815   }
6816   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6817 
6818   /* Add received remote entries to A and B */
6819   for (PetscCount i = 0; i < coo->Annz2; i++) {
6820     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6821   }
6822   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6823     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6824   }
6825   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6826   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6827   PetscFunctionReturn(PETSC_SUCCESS);
6828 }
6829 
6830 /*MC
6831    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6832 
6833    Options Database Keys:
6834 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6835 
6836    Level: beginner
6837 
6838    Notes:
6839    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6840     in this case the values associated with the rows and columns one passes in are set to zero
6841     in the matrix
6842 
6843     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6844     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6845 
6846 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6847 M*/
6848 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6849 {
6850   Mat_MPIAIJ *b;
6851   PetscMPIInt size;
6852 
6853   PetscFunctionBegin;
6854   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6855 
6856   PetscCall(PetscNew(&b));
6857   B->data       = (void *)b;
6858   B->ops[0]     = MatOps_Values;
6859   B->assembled  = PETSC_FALSE;
6860   B->insertmode = NOT_SET_VALUES;
6861   b->size       = size;
6862 
6863   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6864 
6865   /* build cache for off array entries formed */
6866   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6867 
6868   b->donotstash  = PETSC_FALSE;
6869   b->colmap      = NULL;
6870   b->garray      = NULL;
6871   b->roworiented = PETSC_TRUE;
6872 
6873   /* stuff used for matrix vector multiply */
6874   b->lvec  = NULL;
6875   b->Mvctx = NULL;
6876 
6877   /* stuff for MatGetRow() */
6878   b->rowindices   = NULL;
6879   b->rowvalues    = NULL;
6880   b->getrowactive = PETSC_FALSE;
6881 
6882   /* flexible pointer used in CUSPARSE classes */
6883   b->spptr = NULL;
6884 
6885   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6886   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6887   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6888   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6889   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6890   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6891   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6892   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6893   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6894   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6895   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6896 #if defined(PETSC_HAVE_CUDA)
6897   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6898 #endif
6899 #if defined(PETSC_HAVE_HIP)
6900   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6901 #endif
6902 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6903   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6904 #endif
6905 #if defined(PETSC_HAVE_MKL_SPARSE)
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6907 #endif
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6912 #if defined(PETSC_HAVE_ELEMENTAL)
6913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6914 #endif
6915 #if defined(PETSC_HAVE_SCALAPACK)
6916   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6917 #endif
6918   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6919   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6920 #if defined(PETSC_HAVE_HYPRE)
6921   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6922   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6923 #endif
6924   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6925   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6928   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6929   PetscFunctionReturn(PETSC_SUCCESS);
6930 }
6931 
6932 /*@
6933   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6934   and "off-diagonal" part of the matrix in CSR format.
6935 
6936   Collective
6937 
6938   Input Parameters:
6939 + comm - MPI communicator
6940 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6941 . n    - This value should be the same as the local size used in creating the
6942          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6943          calculated if `N` is given) For square matrices `n` is almost always `m`.
6944 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6945 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6946 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6947 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6948 . a    - matrix values
6949 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6950 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6951 - oa   - matrix values
6952 
6953   Output Parameter:
6954 . mat - the matrix
6955 
6956   Level: advanced
6957 
6958   Notes:
6959   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6960   must free the arrays once the matrix has been destroyed and not before.
6961 
6962   The `i` and `j` indices are 0 based
6963 
6964   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6965 
6966   This sets local rows and cannot be used to set off-processor values.
6967 
6968   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6969   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6970   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6971   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6972   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6973   communication if it is known that only local entries will be set.
6974 
6975 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6976           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6977 @*/
6978 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6979 {
6980   Mat_MPIAIJ *maij;
6981 
6982   PetscFunctionBegin;
6983   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6984   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6985   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6986   PetscCall(MatCreate(comm, mat));
6987   PetscCall(MatSetSizes(*mat, m, n, M, N));
6988   PetscCall(MatSetType(*mat, MATMPIAIJ));
6989   maij = (Mat_MPIAIJ *)(*mat)->data;
6990 
6991   (*mat)->preallocated = PETSC_TRUE;
6992 
6993   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6994   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6995 
6996   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6997   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6998 
6999   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7000   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7001   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7002   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7003   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7004   PetscFunctionReturn(PETSC_SUCCESS);
7005 }
7006 
7007 typedef struct {
7008   Mat       *mp;    /* intermediate products */
7009   PetscBool *mptmp; /* is the intermediate product temporary ? */
7010   PetscInt   cp;    /* number of intermediate products */
7011 
7012   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7013   PetscInt    *startsj_s, *startsj_r;
7014   PetscScalar *bufa;
7015   Mat          P_oth;
7016 
7017   /* may take advantage of merging product->B */
7018   Mat Bloc; /* B-local by merging diag and off-diag */
7019 
7020   /* cusparse does not have support to split between symbolic and numeric phases.
7021      When api_user is true, we don't need to update the numerical values
7022      of the temporary storage */
7023   PetscBool reusesym;
7024 
7025   /* support for COO values insertion */
7026   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7027   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7028   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7029   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7030   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7031   PetscMemType mtype;
7032 
7033   /* customization */
7034   PetscBool abmerge;
7035   PetscBool P_oth_bind;
7036 } MatMatMPIAIJBACKEND;
7037 
7038 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7039 {
7040   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7041   PetscInt             i;
7042 
7043   PetscFunctionBegin;
7044   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7045   PetscCall(PetscFree(mmdata->bufa));
7046   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7047   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7048   PetscCall(MatDestroy(&mmdata->P_oth));
7049   PetscCall(MatDestroy(&mmdata->Bloc));
7050   PetscCall(PetscSFDestroy(&mmdata->sf));
7051   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7052   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7053   PetscCall(PetscFree(mmdata->own[0]));
7054   PetscCall(PetscFree(mmdata->own));
7055   PetscCall(PetscFree(mmdata->off[0]));
7056   PetscCall(PetscFree(mmdata->off));
7057   PetscCall(PetscFree(mmdata));
7058   PetscFunctionReturn(PETSC_SUCCESS);
7059 }
7060 
7061 /* Copy selected n entries with indices in idx[] of A to v[].
7062    If idx is NULL, copy the whole data array of A to v[]
7063  */
7064 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7065 {
7066   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7067 
7068   PetscFunctionBegin;
7069   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7070   if (f) {
7071     PetscCall((*f)(A, n, idx, v));
7072   } else {
7073     const PetscScalar *vv;
7074 
7075     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7076     if (n && idx) {
7077       PetscScalar    *w  = v;
7078       const PetscInt *oi = idx;
7079       PetscInt        j;
7080 
7081       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7082     } else {
7083       PetscCall(PetscArraycpy(v, vv, n));
7084     }
7085     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7086   }
7087   PetscFunctionReturn(PETSC_SUCCESS);
7088 }
7089 
7090 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7091 {
7092   MatMatMPIAIJBACKEND *mmdata;
7093   PetscInt             i, n_d, n_o;
7094 
7095   PetscFunctionBegin;
7096   MatCheckProduct(C, 1);
7097   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7098   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7099   if (!mmdata->reusesym) { /* update temporary matrices */
7100     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7101     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7102   }
7103   mmdata->reusesym = PETSC_FALSE;
7104 
7105   for (i = 0; i < mmdata->cp; i++) {
7106     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7107     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7108   }
7109   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7110     PetscInt noff;
7111 
7112     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7113     if (mmdata->mptmp[i]) continue;
7114     if (noff) {
7115       PetscInt nown;
7116 
7117       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7118       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7119       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7120       n_o += noff;
7121       n_d += nown;
7122     } else {
7123       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7124 
7125       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7126       n_d += mm->nz;
7127     }
7128   }
7129   if (mmdata->hasoffproc) { /* offprocess insertion */
7130     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7131     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7132   }
7133   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7134   PetscFunctionReturn(PETSC_SUCCESS);
7135 }
7136 
7137 /* Support for Pt * A, A * P, or Pt * A * P */
7138 #define MAX_NUMBER_INTERMEDIATE 4
7139 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7140 {
7141   Mat_Product           *product = C->product;
7142   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7143   Mat_MPIAIJ            *a, *p;
7144   MatMatMPIAIJBACKEND   *mmdata;
7145   ISLocalToGlobalMapping P_oth_l2g = NULL;
7146   IS                     glob      = NULL;
7147   const char            *prefix;
7148   char                   pprefix[256];
7149   const PetscInt        *globidx, *P_oth_idx;
7150   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7151   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7152   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7153                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7154                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7155   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7156 
7157   MatProductType ptype;
7158   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7159   PetscMPIInt    size;
7160 
7161   PetscFunctionBegin;
7162   MatCheckProduct(C, 1);
7163   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7164   ptype = product->type;
7165   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7166     ptype                                          = MATPRODUCT_AB;
7167     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7168   }
7169   switch (ptype) {
7170   case MATPRODUCT_AB:
7171     A          = product->A;
7172     P          = product->B;
7173     m          = A->rmap->n;
7174     n          = P->cmap->n;
7175     M          = A->rmap->N;
7176     N          = P->cmap->N;
7177     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7178     break;
7179   case MATPRODUCT_AtB:
7180     P          = product->A;
7181     A          = product->B;
7182     m          = P->cmap->n;
7183     n          = A->cmap->n;
7184     M          = P->cmap->N;
7185     N          = A->cmap->N;
7186     hasoffproc = PETSC_TRUE;
7187     break;
7188   case MATPRODUCT_PtAP:
7189     A          = product->A;
7190     P          = product->B;
7191     m          = P->cmap->n;
7192     n          = P->cmap->n;
7193     M          = P->cmap->N;
7194     N          = P->cmap->N;
7195     hasoffproc = PETSC_TRUE;
7196     break;
7197   default:
7198     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7199   }
7200   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7201   if (size == 1) hasoffproc = PETSC_FALSE;
7202 
7203   /* defaults */
7204   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7205     mp[i]    = NULL;
7206     mptmp[i] = PETSC_FALSE;
7207     rmapt[i] = -1;
7208     cmapt[i] = -1;
7209     rmapa[i] = NULL;
7210     cmapa[i] = NULL;
7211   }
7212 
7213   /* customization */
7214   PetscCall(PetscNew(&mmdata));
7215   mmdata->reusesym = product->api_user;
7216   if (ptype == MATPRODUCT_AB) {
7217     if (product->api_user) {
7218       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7219       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7220       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7221       PetscOptionsEnd();
7222     } else {
7223       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7224       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7225       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7226       PetscOptionsEnd();
7227     }
7228   } else if (ptype == MATPRODUCT_PtAP) {
7229     if (product->api_user) {
7230       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7231       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7232       PetscOptionsEnd();
7233     } else {
7234       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7235       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7236       PetscOptionsEnd();
7237     }
7238   }
7239   a = (Mat_MPIAIJ *)A->data;
7240   p = (Mat_MPIAIJ *)P->data;
7241   PetscCall(MatSetSizes(C, m, n, M, N));
7242   PetscCall(PetscLayoutSetUp(C->rmap));
7243   PetscCall(PetscLayoutSetUp(C->cmap));
7244   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7245   PetscCall(MatGetOptionsPrefix(C, &prefix));
7246 
7247   cp = 0;
7248   switch (ptype) {
7249   case MATPRODUCT_AB: /* A * P */
7250     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7251 
7252     /* A_diag * P_local (merged or not) */
7253     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7254       /* P is product->B */
7255       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7256       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7257       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7258       PetscCall(MatProductSetFill(mp[cp], product->fill));
7259       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7260       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7261       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7262       mp[cp]->product->api_user = product->api_user;
7263       PetscCall(MatProductSetFromOptions(mp[cp]));
7264       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7265       PetscCall(ISGetIndices(glob, &globidx));
7266       rmapt[cp] = 1;
7267       cmapt[cp] = 2;
7268       cmapa[cp] = globidx;
7269       mptmp[cp] = PETSC_FALSE;
7270       cp++;
7271     } else { /* A_diag * P_diag and A_diag * P_off */
7272       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7273       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7274       PetscCall(MatProductSetFill(mp[cp], product->fill));
7275       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7276       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7277       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7278       mp[cp]->product->api_user = product->api_user;
7279       PetscCall(MatProductSetFromOptions(mp[cp]));
7280       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7281       rmapt[cp] = 1;
7282       cmapt[cp] = 1;
7283       mptmp[cp] = PETSC_FALSE;
7284       cp++;
7285       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7286       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7287       PetscCall(MatProductSetFill(mp[cp], product->fill));
7288       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7289       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7290       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7291       mp[cp]->product->api_user = product->api_user;
7292       PetscCall(MatProductSetFromOptions(mp[cp]));
7293       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7294       rmapt[cp] = 1;
7295       cmapt[cp] = 2;
7296       cmapa[cp] = p->garray;
7297       mptmp[cp] = PETSC_FALSE;
7298       cp++;
7299     }
7300 
7301     /* A_off * P_other */
7302     if (mmdata->P_oth) {
7303       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7304       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7305       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7306       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7307       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7308       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7309       PetscCall(MatProductSetFill(mp[cp], product->fill));
7310       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7311       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7312       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7313       mp[cp]->product->api_user = product->api_user;
7314       PetscCall(MatProductSetFromOptions(mp[cp]));
7315       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7316       rmapt[cp] = 1;
7317       cmapt[cp] = 2;
7318       cmapa[cp] = P_oth_idx;
7319       mptmp[cp] = PETSC_FALSE;
7320       cp++;
7321     }
7322     break;
7323 
7324   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7325     /* A is product->B */
7326     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7327     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7328       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7329       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7330       PetscCall(MatProductSetFill(mp[cp], product->fill));
7331       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7332       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7333       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7334       mp[cp]->product->api_user = product->api_user;
7335       PetscCall(MatProductSetFromOptions(mp[cp]));
7336       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7337       PetscCall(ISGetIndices(glob, &globidx));
7338       rmapt[cp] = 2;
7339       rmapa[cp] = globidx;
7340       cmapt[cp] = 2;
7341       cmapa[cp] = globidx;
7342       mptmp[cp] = PETSC_FALSE;
7343       cp++;
7344     } else {
7345       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7346       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7347       PetscCall(MatProductSetFill(mp[cp], product->fill));
7348       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7349       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7350       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7351       mp[cp]->product->api_user = product->api_user;
7352       PetscCall(MatProductSetFromOptions(mp[cp]));
7353       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7354       PetscCall(ISGetIndices(glob, &globidx));
7355       rmapt[cp] = 1;
7356       cmapt[cp] = 2;
7357       cmapa[cp] = globidx;
7358       mptmp[cp] = PETSC_FALSE;
7359       cp++;
7360       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7361       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7362       PetscCall(MatProductSetFill(mp[cp], product->fill));
7363       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7364       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7365       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7366       mp[cp]->product->api_user = product->api_user;
7367       PetscCall(MatProductSetFromOptions(mp[cp]));
7368       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7369       rmapt[cp] = 2;
7370       rmapa[cp] = p->garray;
7371       cmapt[cp] = 2;
7372       cmapa[cp] = globidx;
7373       mptmp[cp] = PETSC_FALSE;
7374       cp++;
7375     }
7376     break;
7377   case MATPRODUCT_PtAP:
7378     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7379     /* P is product->B */
7380     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7381     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7382     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7383     PetscCall(MatProductSetFill(mp[cp], product->fill));
7384     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7385     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7386     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7387     mp[cp]->product->api_user = product->api_user;
7388     PetscCall(MatProductSetFromOptions(mp[cp]));
7389     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7390     PetscCall(ISGetIndices(glob, &globidx));
7391     rmapt[cp] = 2;
7392     rmapa[cp] = globidx;
7393     cmapt[cp] = 2;
7394     cmapa[cp] = globidx;
7395     mptmp[cp] = PETSC_FALSE;
7396     cp++;
7397     if (mmdata->P_oth) {
7398       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7399       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7400       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7401       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7402       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7403       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7404       PetscCall(MatProductSetFill(mp[cp], product->fill));
7405       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7406       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7407       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7408       mp[cp]->product->api_user = product->api_user;
7409       PetscCall(MatProductSetFromOptions(mp[cp]));
7410       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7411       mptmp[cp] = PETSC_TRUE;
7412       cp++;
7413       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7414       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7415       PetscCall(MatProductSetFill(mp[cp], product->fill));
7416       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7417       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7418       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7419       mp[cp]->product->api_user = product->api_user;
7420       PetscCall(MatProductSetFromOptions(mp[cp]));
7421       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7422       rmapt[cp] = 2;
7423       rmapa[cp] = globidx;
7424       cmapt[cp] = 2;
7425       cmapa[cp] = P_oth_idx;
7426       mptmp[cp] = PETSC_FALSE;
7427       cp++;
7428     }
7429     break;
7430   default:
7431     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7432   }
7433   /* sanity check */
7434   if (size > 1)
7435     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7436 
7437   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7438   for (i = 0; i < cp; i++) {
7439     mmdata->mp[i]    = mp[i];
7440     mmdata->mptmp[i] = mptmp[i];
7441   }
7442   mmdata->cp             = cp;
7443   C->product->data       = mmdata;
7444   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7445   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7446 
7447   /* memory type */
7448   mmdata->mtype = PETSC_MEMTYPE_HOST;
7449   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7450   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7451   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7452   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7453   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7454   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7455 
7456   /* prepare coo coordinates for values insertion */
7457 
7458   /* count total nonzeros of those intermediate seqaij Mats
7459     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7460     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7461     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7462   */
7463   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7464     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7465     if (mptmp[cp]) continue;
7466     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7467       const PetscInt *rmap = rmapa[cp];
7468       const PetscInt  mr   = mp[cp]->rmap->n;
7469       const PetscInt  rs   = C->rmap->rstart;
7470       const PetscInt  re   = C->rmap->rend;
7471       const PetscInt *ii   = mm->i;
7472       for (i = 0; i < mr; i++) {
7473         const PetscInt gr = rmap[i];
7474         const PetscInt nz = ii[i + 1] - ii[i];
7475         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7476         else ncoo_oown += nz;                  /* this row is local */
7477       }
7478     } else ncoo_d += mm->nz;
7479   }
7480 
7481   /*
7482     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7483 
7484     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7485 
7486     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7487 
7488     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7489     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7490     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7491 
7492     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7493     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7494   */
7495   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7496   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7497 
7498   /* gather (i,j) of nonzeros inserted by remote procs */
7499   if (hasoffproc) {
7500     PetscSF  msf;
7501     PetscInt ncoo2, *coo_i2, *coo_j2;
7502 
7503     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7504     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7505     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7506 
7507     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7508       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7509       PetscInt   *idxoff = mmdata->off[cp];
7510       PetscInt   *idxown = mmdata->own[cp];
7511       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7512         const PetscInt *rmap = rmapa[cp];
7513         const PetscInt *cmap = cmapa[cp];
7514         const PetscInt *ii   = mm->i;
7515         PetscInt       *coi  = coo_i + ncoo_o;
7516         PetscInt       *coj  = coo_j + ncoo_o;
7517         const PetscInt  mr   = mp[cp]->rmap->n;
7518         const PetscInt  rs   = C->rmap->rstart;
7519         const PetscInt  re   = C->rmap->rend;
7520         const PetscInt  cs   = C->cmap->rstart;
7521         for (i = 0; i < mr; i++) {
7522           const PetscInt *jj = mm->j + ii[i];
7523           const PetscInt  gr = rmap[i];
7524           const PetscInt  nz = ii[i + 1] - ii[i];
7525           if (gr < rs || gr >= re) { /* this is an offproc row */
7526             for (j = ii[i]; j < ii[i + 1]; j++) {
7527               *coi++    = gr;
7528               *idxoff++ = j;
7529             }
7530             if (!cmapt[cp]) { /* already global */
7531               for (j = 0; j < nz; j++) *coj++ = jj[j];
7532             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7533               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7534             } else { /* offdiag */
7535               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7536             }
7537             ncoo_o += nz;
7538           } else { /* this is a local row */
7539             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7540           }
7541         }
7542       }
7543       mmdata->off[cp + 1] = idxoff;
7544       mmdata->own[cp + 1] = idxown;
7545     }
7546 
7547     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7548     PetscInt incoo_o;
7549     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7550     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7551     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7552     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7553     ncoo = ncoo_d + ncoo_oown + ncoo2;
7554     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7555     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7556     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7557     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7558     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7559     PetscCall(PetscFree2(coo_i, coo_j));
7560     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7561     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7562     coo_i = coo_i2;
7563     coo_j = coo_j2;
7564   } else { /* no offproc values insertion */
7565     ncoo = ncoo_d;
7566     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7567 
7568     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7569     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7570     PetscCall(PetscSFSetUp(mmdata->sf));
7571   }
7572   mmdata->hasoffproc = hasoffproc;
7573 
7574   /* gather (i,j) of nonzeros inserted locally */
7575   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7576     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7577     PetscInt       *coi  = coo_i + ncoo_d;
7578     PetscInt       *coj  = coo_j + ncoo_d;
7579     const PetscInt *jj   = mm->j;
7580     const PetscInt *ii   = mm->i;
7581     const PetscInt *cmap = cmapa[cp];
7582     const PetscInt *rmap = rmapa[cp];
7583     const PetscInt  mr   = mp[cp]->rmap->n;
7584     const PetscInt  rs   = C->rmap->rstart;
7585     const PetscInt  re   = C->rmap->rend;
7586     const PetscInt  cs   = C->cmap->rstart;
7587 
7588     if (mptmp[cp]) continue;
7589     if (rmapt[cp] == 1) { /* consecutive rows */
7590       /* fill coo_i */
7591       for (i = 0; i < mr; i++) {
7592         const PetscInt gr = i + rs;
7593         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7594       }
7595       /* fill coo_j */
7596       if (!cmapt[cp]) { /* type-0, already global */
7597         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7598       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7599         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7600       } else {                                            /* type-2, local to global for sparse columns */
7601         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7602       }
7603       ncoo_d += mm->nz;
7604     } else if (rmapt[cp] == 2) { /* sparse rows */
7605       for (i = 0; i < mr; i++) {
7606         const PetscInt *jj = mm->j + ii[i];
7607         const PetscInt  gr = rmap[i];
7608         const PetscInt  nz = ii[i + 1] - ii[i];
7609         if (gr >= rs && gr < re) { /* local rows */
7610           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7611           if (!cmapt[cp]) { /* type-0, already global */
7612             for (j = 0; j < nz; j++) *coj++ = jj[j];
7613           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7614             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7615           } else { /* type-2, local to global for sparse columns */
7616             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7617           }
7618           ncoo_d += nz;
7619         }
7620       }
7621     }
7622   }
7623   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7624   PetscCall(ISDestroy(&glob));
7625   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7626   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7627   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7628   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7629 
7630   /* set block sizes */
7631   A = product->A;
7632   P = product->B;
7633   switch (ptype) {
7634   case MATPRODUCT_PtAP:
7635     if (P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs));
7636     break;
7637   case MATPRODUCT_RARt:
7638     if (P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs));
7639     break;
7640   case MATPRODUCT_ABC:
7641     PetscCall(MatSetBlockSizesFromMats(C, A, product->C));
7642     break;
7643   case MATPRODUCT_AB:
7644     PetscCall(MatSetBlockSizesFromMats(C, A, P));
7645     break;
7646   case MATPRODUCT_AtB:
7647     if (A->cmap->bs > 1 || P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs));
7648     break;
7649   case MATPRODUCT_ABt:
7650     if (A->rmap->bs > 1 || P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs));
7651     break;
7652   default:
7653     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]);
7654   }
7655 
7656   /* preallocate with COO data */
7657   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7658   PetscCall(PetscFree2(coo_i, coo_j));
7659   PetscFunctionReturn(PETSC_SUCCESS);
7660 }
7661 
7662 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7663 {
7664   Mat_Product *product = mat->product;
7665 #if defined(PETSC_HAVE_DEVICE)
7666   PetscBool match  = PETSC_FALSE;
7667   PetscBool usecpu = PETSC_FALSE;
7668 #else
7669   PetscBool match = PETSC_TRUE;
7670 #endif
7671 
7672   PetscFunctionBegin;
7673   MatCheckProduct(mat, 1);
7674 #if defined(PETSC_HAVE_DEVICE)
7675   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7676   if (match) { /* we can always fallback to the CPU if requested */
7677     switch (product->type) {
7678     case MATPRODUCT_AB:
7679       if (product->api_user) {
7680         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7681         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7682         PetscOptionsEnd();
7683       } else {
7684         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7685         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7686         PetscOptionsEnd();
7687       }
7688       break;
7689     case MATPRODUCT_AtB:
7690       if (product->api_user) {
7691         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7692         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7693         PetscOptionsEnd();
7694       } else {
7695         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7696         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7697         PetscOptionsEnd();
7698       }
7699       break;
7700     case MATPRODUCT_PtAP:
7701       if (product->api_user) {
7702         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7703         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7704         PetscOptionsEnd();
7705       } else {
7706         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7707         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7708         PetscOptionsEnd();
7709       }
7710       break;
7711     default:
7712       break;
7713     }
7714     match = (PetscBool)!usecpu;
7715   }
7716 #endif
7717   if (match) {
7718     switch (product->type) {
7719     case MATPRODUCT_AB:
7720     case MATPRODUCT_AtB:
7721     case MATPRODUCT_PtAP:
7722       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7723       break;
7724     default:
7725       break;
7726     }
7727   }
7728   /* fallback to MPIAIJ ops */
7729   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7730   PetscFunctionReturn(PETSC_SUCCESS);
7731 }
7732 
7733 /*
7734    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7735 
7736    n - the number of block indices in cc[]
7737    cc - the block indices (must be large enough to contain the indices)
7738 */
7739 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7740 {
7741   PetscInt        cnt = -1, nidx, j;
7742   const PetscInt *idx;
7743 
7744   PetscFunctionBegin;
7745   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7746   if (nidx) {
7747     cnt     = 0;
7748     cc[cnt] = idx[0] / bs;
7749     for (j = 1; j < nidx; j++) {
7750       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7751     }
7752   }
7753   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7754   *n = cnt + 1;
7755   PetscFunctionReturn(PETSC_SUCCESS);
7756 }
7757 
7758 /*
7759     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7760 
7761     ncollapsed - the number of block indices
7762     collapsed - the block indices (must be large enough to contain the indices)
7763 */
7764 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7765 {
7766   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7767 
7768   PetscFunctionBegin;
7769   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7770   for (i = start + 1; i < start + bs; i++) {
7771     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7772     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7773     cprevtmp = cprev;
7774     cprev    = merged;
7775     merged   = cprevtmp;
7776   }
7777   *ncollapsed = nprev;
7778   if (collapsed) *collapsed = cprev;
7779   PetscFunctionReturn(PETSC_SUCCESS);
7780 }
7781 
7782 /*
7783  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7784 
7785  Input Parameter:
7786  . Amat - matrix
7787  - symmetrize - make the result symmetric
7788  + scale - scale with diagonal
7789 
7790  Output Parameter:
7791  . a_Gmat - output scalar graph >= 0
7792 
7793 */
7794 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7795 {
7796   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7797   MPI_Comm  comm;
7798   Mat       Gmat;
7799   PetscBool ismpiaij, isseqaij;
7800   Mat       a, b, c;
7801   MatType   jtype;
7802 
7803   PetscFunctionBegin;
7804   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7805   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7806   PetscCall(MatGetSize(Amat, &MM, &NN));
7807   PetscCall(MatGetBlockSize(Amat, &bs));
7808   nloc = (Iend - Istart) / bs;
7809 
7810   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7811   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7812   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7813 
7814   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7815   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7816      implementation */
7817   if (bs > 1) {
7818     PetscCall(MatGetType(Amat, &jtype));
7819     PetscCall(MatCreate(comm, &Gmat));
7820     PetscCall(MatSetType(Gmat, jtype));
7821     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7822     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7823     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7824       PetscInt  *d_nnz, *o_nnz;
7825       MatScalar *aa, val, *AA;
7826       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7827 
7828       if (isseqaij) {
7829         a = Amat;
7830         b = NULL;
7831       } else {
7832         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7833         a             = d->A;
7834         b             = d->B;
7835       }
7836       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7837       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7838       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7839         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7840         const PetscInt *cols1, *cols2;
7841 
7842         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7843           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7844           nnz[brow / bs] = nc2 / bs;
7845           if (nc2 % bs) ok = 0;
7846           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7847           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7848             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7849             if (nc1 != nc2) ok = 0;
7850             else {
7851               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7852                 if (cols1[jj] != cols2[jj]) ok = 0;
7853                 if (cols1[jj] % bs != jj % bs) ok = 0;
7854               }
7855             }
7856             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7857           }
7858           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7859           if (!ok) {
7860             PetscCall(PetscFree2(d_nnz, o_nnz));
7861             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7862             goto old_bs;
7863           }
7864         }
7865       }
7866       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7867       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7868       PetscCall(PetscFree2(d_nnz, o_nnz));
7869       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7870       // diag
7871       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7872         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7873 
7874         ai = aseq->i;
7875         n  = ai[brow + 1] - ai[brow];
7876         aj = aseq->j + ai[brow];
7877         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7878           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7879           val        = 0;
7880           if (index_size == 0) {
7881             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7882               aa = aseq->a + ai[brow + ii] + k;
7883               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7884                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7885               }
7886             }
7887           } else {                                            // use (index,index) value if provided
7888             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7889               PetscInt ii = index[iii];
7890               aa          = aseq->a + ai[brow + ii] + k;
7891               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7892                 PetscInt jj = index[jjj];
7893                 val += PetscAbs(PetscRealPart(aa[jj]));
7894               }
7895             }
7896           }
7897           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7898           AA[k / bs] = val;
7899         }
7900         grow = Istart / bs + brow / bs;
7901         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7902       }
7903       // off-diag
7904       if (ismpiaij) {
7905         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7906         const PetscScalar *vals;
7907         const PetscInt    *cols, *garray = aij->garray;
7908 
7909         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7910         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7911           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7912           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7913             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7914             AA[k / bs] = 0;
7915             AJ[cidx]   = garray[cols[k]] / bs;
7916           }
7917           nc = ncols / bs;
7918           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7919           if (index_size == 0) {
7920             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7921               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7922               for (PetscInt k = 0; k < ncols; k += bs) {
7923                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7924                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7925                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7926                 }
7927               }
7928               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7929             }
7930           } else {                                            // use (index,index) value if provided
7931             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7932               PetscInt ii = index[iii];
7933               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7934               for (PetscInt k = 0; k < ncols; k += bs) {
7935                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7936                   PetscInt jj = index[jjj];
7937                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7938                 }
7939               }
7940               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7941             }
7942           }
7943           grow = Istart / bs + brow / bs;
7944           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7945         }
7946       }
7947       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7948       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7949       PetscCall(PetscFree2(AA, AJ));
7950     } else {
7951       const PetscScalar *vals;
7952       const PetscInt    *idx;
7953       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7954     old_bs:
7955       /*
7956        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7957        */
7958       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7959       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7960       if (isseqaij) {
7961         PetscInt max_d_nnz;
7962 
7963         /*
7964          Determine exact preallocation count for (sequential) scalar matrix
7965          */
7966         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7967         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7968         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7969         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7970         PetscCall(PetscFree3(w0, w1, w2));
7971       } else if (ismpiaij) {
7972         Mat             Daij, Oaij;
7973         const PetscInt *garray;
7974         PetscInt        max_d_nnz;
7975 
7976         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7977         /*
7978          Determine exact preallocation count for diagonal block portion of scalar matrix
7979          */
7980         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7981         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7982         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7983         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7984         PetscCall(PetscFree3(w0, w1, w2));
7985         /*
7986          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7987          */
7988         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7989           o_nnz[jj] = 0;
7990           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7991             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7992             o_nnz[jj] += ncols;
7993             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7994           }
7995           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7996         }
7997       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7998       /* get scalar copy (norms) of matrix */
7999       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
8000       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
8001       PetscCall(PetscFree2(d_nnz, o_nnz));
8002       for (Ii = Istart; Ii < Iend; Ii++) {
8003         PetscInt dest_row = Ii / bs;
8004 
8005         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
8006         for (jj = 0; jj < ncols; jj++) {
8007           PetscInt    dest_col = idx[jj] / bs;
8008           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
8009 
8010           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
8011         }
8012         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
8013       }
8014       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8015       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8016     }
8017   } else {
8018     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8019     else {
8020       Gmat = Amat;
8021       PetscCall(PetscObjectReference((PetscObject)Gmat));
8022     }
8023     if (isseqaij) {
8024       a = Gmat;
8025       b = NULL;
8026     } else {
8027       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8028       a             = d->A;
8029       b             = d->B;
8030     }
8031     if (filter >= 0 || scale) {
8032       /* take absolute value of each entry */
8033       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8034         MatInfo      info;
8035         PetscScalar *avals;
8036 
8037         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8038         PetscCall(MatSeqAIJGetArray(c, &avals));
8039         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8040         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8041       }
8042     }
8043   }
8044   if (symmetrize) {
8045     PetscBool isset, issym;
8046 
8047     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8048     if (!isset || !issym) {
8049       Mat matTrans;
8050 
8051       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8052       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8053       PetscCall(MatDestroy(&matTrans));
8054     }
8055     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8056   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8057   if (scale) {
8058     /* scale c for all diagonal values = 1 or -1 */
8059     Vec diag;
8060 
8061     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8062     PetscCall(MatGetDiagonal(Gmat, diag));
8063     PetscCall(VecReciprocal(diag));
8064     PetscCall(VecSqrtAbs(diag));
8065     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8066     PetscCall(VecDestroy(&diag));
8067   }
8068   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8069   if (filter >= 0) {
8070     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8071     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8072   }
8073   *a_Gmat = Gmat;
8074   PetscFunctionReturn(PETSC_SUCCESS);
8075 }
8076 
8077 /*
8078     Special version for direct calls from Fortran
8079 */
8080 
8081 /* Change these macros so can be used in void function */
8082 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8083 #undef PetscCall
8084 #define PetscCall(...) \
8085   do { \
8086     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8087     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8088       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8089       return; \
8090     } \
8091   } while (0)
8092 
8093 #undef SETERRQ
8094 #define SETERRQ(comm, ierr, ...) \
8095   do { \
8096     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8097     return; \
8098   } while (0)
8099 
8100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8101   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8103   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8104 #else
8105 #endif
8106 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8107 {
8108   Mat         mat = *mmat;
8109   PetscInt    m = *mm, n = *mn;
8110   InsertMode  addv = *maddv;
8111   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8112   PetscScalar value;
8113 
8114   MatCheckPreallocated(mat, 1);
8115   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8116   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8117   {
8118     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8119     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8120     PetscBool roworiented = aij->roworiented;
8121 
8122     /* Some Variables required in the macro */
8123     Mat         A     = aij->A;
8124     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8125     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8126     MatScalar  *aa;
8127     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8128     Mat         B                 = aij->B;
8129     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8130     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8131     MatScalar  *ba;
8132     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8133      * cannot use "#if defined" inside a macro. */
8134     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8135 
8136     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8137     PetscInt   nonew = a->nonew;
8138     MatScalar *ap1, *ap2;
8139 
8140     PetscFunctionBegin;
8141     PetscCall(MatSeqAIJGetArray(A, &aa));
8142     PetscCall(MatSeqAIJGetArray(B, &ba));
8143     for (i = 0; i < m; i++) {
8144       if (im[i] < 0) continue;
8145       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8146       if (im[i] >= rstart && im[i] < rend) {
8147         row      = im[i] - rstart;
8148         lastcol1 = -1;
8149         rp1      = aj + ai[row];
8150         ap1      = aa + ai[row];
8151         rmax1    = aimax[row];
8152         nrow1    = ailen[row];
8153         low1     = 0;
8154         high1    = nrow1;
8155         lastcol2 = -1;
8156         rp2      = bj + bi[row];
8157         ap2      = ba + bi[row];
8158         rmax2    = bimax[row];
8159         nrow2    = bilen[row];
8160         low2     = 0;
8161         high2    = nrow2;
8162 
8163         for (j = 0; j < n; j++) {
8164           if (roworiented) value = v[i * n + j];
8165           else value = v[i + j * m];
8166           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8167           if (in[j] >= cstart && in[j] < cend) {
8168             col = in[j] - cstart;
8169             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8170           } else if (in[j] < 0) continue;
8171           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8172             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8173           } else {
8174             if (mat->was_assembled) {
8175               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8176 #if defined(PETSC_USE_CTABLE)
8177               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8178               col--;
8179 #else
8180               col = aij->colmap[in[j]] - 1;
8181 #endif
8182               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8183                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8184                 col = in[j];
8185                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8186                 B        = aij->B;
8187                 b        = (Mat_SeqAIJ *)B->data;
8188                 bimax    = b->imax;
8189                 bi       = b->i;
8190                 bilen    = b->ilen;
8191                 bj       = b->j;
8192                 rp2      = bj + bi[row];
8193                 ap2      = ba + bi[row];
8194                 rmax2    = bimax[row];
8195                 nrow2    = bilen[row];
8196                 low2     = 0;
8197                 high2    = nrow2;
8198                 bm       = aij->B->rmap->n;
8199                 ba       = b->a;
8200                 inserted = PETSC_FALSE;
8201               }
8202             } else col = in[j];
8203             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8204           }
8205         }
8206       } else if (!aij->donotstash) {
8207         if (roworiented) {
8208           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8209         } else {
8210           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8211         }
8212       }
8213     }
8214     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8215     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8216   }
8217   PetscFunctionReturnVoid();
8218 }
8219 
8220 /* Undefining these here since they were redefined from their original definition above! No
8221  * other PETSc functions should be defined past this point, as it is impossible to recover the
8222  * original definitions */
8223 #undef PetscCall
8224 #undef SETERRQ
8225