xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 89a8e22d45cf8ef376b46ea5983ec24985e49039)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK)
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309 
310   PetscFunctionBegin;
311   PetscCall(MatGetSize(A, &m, &n));
312   PetscCall(PetscCalloc1(n, &work));
313   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
314   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
316   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
317   if (type == NORM_2) {
318     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
319     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
320   } else if (type == NORM_1) {
321     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
322     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
323   } else if (type == NORM_INFINITY) {
324     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
325     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
327     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
328     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
329   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
330     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
331     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
332   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
333   if (type == NORM_INFINITY) {
334     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
335   } else {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
337   }
338   PetscCall(PetscFree(work));
339   if (type == NORM_2) {
340     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
341   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
342     for (i = 0; i < n; i++) reductions[i] /= m;
343   }
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 
347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
348 {
349   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
350   IS              sis, gis;
351   const PetscInt *isis, *igis;
352   PetscInt        n, *iis, nsis, ngis, rstart, i;
353 
354   PetscFunctionBegin;
355   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
356   PetscCall(MatFindNonzeroRows(a->B, &gis));
357   PetscCall(ISGetSize(gis, &ngis));
358   PetscCall(ISGetSize(sis, &nsis));
359   PetscCall(ISGetIndices(sis, &isis));
360   PetscCall(ISGetIndices(gis, &igis));
361 
362   PetscCall(PetscMalloc1(ngis + nsis, &iis));
363   PetscCall(PetscArraycpy(iis, igis, ngis));
364   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
365   n = ngis + nsis;
366   PetscCall(PetscSortRemoveDupsInt(&n, iis));
367   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
368   for (i = 0; i < n; i++) iis[i] += rstart;
369   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
370 
371   PetscCall(ISRestoreIndices(sis, &isis));
372   PetscCall(ISRestoreIndices(gis, &igis));
373   PetscCall(ISDestroy(&sis));
374   PetscCall(ISDestroy(&gis));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*
379   Local utility routine that creates a mapping from the global column
380 number to the local number in the off-diagonal part of the local
381 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
382 a slightly higher hash table cost; without it it is not scalable (each processor
383 has an order N integer array but is fast to access.
384 */
385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
386 {
387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
388   PetscInt    n   = aij->B->cmap->n, i;
389 
390   PetscFunctionBegin;
391   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
392 #if defined(PETSC_USE_CTABLE)
393   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
394   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
395 #else
396   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
397   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
398 #endif
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
403   do { \
404     if (col <= lastcol1) low1 = 0; \
405     else high1 = nrow1; \
406     lastcol1 = col; \
407     while (high1 - low1 > 5) { \
408       t = (low1 + high1) / 2; \
409       if (rp1[t] > col) high1 = t; \
410       else low1 = t; \
411     } \
412     for (_i = low1; _i < high1; _i++) { \
413       if (rp1[_i] > col) break; \
414       if (rp1[_i] == col) { \
415         if (addv == ADD_VALUES) { \
416           ap1[_i] += value; \
417           /* Not sure LogFlops will slow dow the code or not */ \
418           (void)PetscLogFlops(1.0); \
419         } else ap1[_i] = value; \
420         goto a_noinsert; \
421       } \
422     } \
423     if (value == 0.0 && ignorezeroentries && row != col) { \
424       low1  = 0; \
425       high1 = nrow1; \
426       goto a_noinsert; \
427     } \
428     if (nonew == 1) { \
429       low1  = 0; \
430       high1 = nrow1; \
431       goto a_noinsert; \
432     } \
433     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
434     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
435     N = nrow1++ - 1; \
436     a->nz++; \
437     high1++; \
438     /* shift up all the later entries in this row */ \
439     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
440     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
441     rp1[_i] = col; \
442     ap1[_i] = value; \
443   a_noinsert:; \
444     ailen[row] = nrow1; \
445   } while (0)
446 
447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
448   do { \
449     if (col <= lastcol2) low2 = 0; \
450     else high2 = nrow2; \
451     lastcol2 = col; \
452     while (high2 - low2 > 5) { \
453       t = (low2 + high2) / 2; \
454       if (rp2[t] > col) high2 = t; \
455       else low2 = t; \
456     } \
457     for (_i = low2; _i < high2; _i++) { \
458       if (rp2[_i] > col) break; \
459       if (rp2[_i] == col) { \
460         if (addv == ADD_VALUES) { \
461           ap2[_i] += value; \
462           (void)PetscLogFlops(1.0); \
463         } else ap2[_i] = value; \
464         goto b_noinsert; \
465       } \
466     } \
467     if (value == 0.0 && ignorezeroentries) { \
468       low2  = 0; \
469       high2 = nrow2; \
470       goto b_noinsert; \
471     } \
472     if (nonew == 1) { \
473       low2  = 0; \
474       high2 = nrow2; \
475       goto b_noinsert; \
476     } \
477     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
478     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
479     N = nrow2++ - 1; \
480     b->nz++; \
481     high2++; \
482     /* shift up all the later entries in this row */ \
483     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
484     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
485     rp2[_i] = col; \
486     ap2[_i] = value; \
487   b_noinsert:; \
488     bilen[row] = nrow2; \
489   } while (0)
490 
491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
492 {
493   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
494   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
495   PetscInt     l, *garray                         = mat->garray, diag;
496   PetscScalar *aa, *ba;
497 
498   PetscFunctionBegin;
499   /* code only works for square matrices A */
500 
501   /* find size of row to the left of the diagonal part */
502   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
503   row = row - diag;
504   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
505     if (garray[b->j[b->i[row] + l]] > diag) break;
506   }
507   if (l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512 
513   /* diagonal part */
514   if (a->i[row + 1] - a->i[row]) {
515     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
516     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
517     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
518   }
519 
520   /* right of diagonal part */
521   if (b->i[row + 1] - b->i[row] - l) {
522     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
523     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
524     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
525   }
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
530 {
531   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
532   PetscScalar value = 0.0;
533   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
534   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
535   PetscBool   roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat         A     = aij->A;
539   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
540   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
541   PetscBool   ignorezeroentries = a->ignorezeroentries;
542   Mat         B                 = aij->B;
543   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
544   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
545   MatScalar  *aa, *ba;
546   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
547   PetscInt    nonew;
548   MatScalar  *ap1, *ap2;
549 
550   PetscFunctionBegin;
551   PetscCall(MatSeqAIJGetArray(A, &aa));
552   PetscCall(MatSeqAIJGetArray(B, &ba));
553   for (i = 0; i < m; i++) {
554     if (im[i] < 0) continue;
555     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
560       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
567       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j = 0; j < n; j++) {
574         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
575         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
580         } else if (in[j] < 0) {
581           continue;
582         } else {
583           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
584           if (mat->was_assembled) {
585             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
586 #if defined(PETSC_USE_CTABLE)
587             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
593               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
594               col = in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ *)B->data;
598               bimax = b->imax;
599               bi    = b->i;
600               bilen = b->ilen;
601               bj    = b->j;
602               ba    = b->a;
603               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
604               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
612               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
613                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
614               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
619         }
620       }
621     } else {
622       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
627         } else {
628           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
629         }
630       }
631     }
632   }
633   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
634   PetscCall(MatSeqAIJRestoreArray(B, &ba));
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
638 /*
639     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
640     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
641     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
642 */
643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
644 {
645   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
646   Mat         A      = aij->A; /* diagonal part of the matrix */
647   Mat         B      = aij->B; /* off-diagonal part of the matrix */
648   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
649   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
650   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
651   PetscInt   *ailen = a->ilen, *aj = a->j;
652   PetscInt   *bilen = b->ilen, *bj = b->j;
653   PetscInt    am          = aij->A->rmap->n, j;
654   PetscInt    diag_so_far = 0, dnz;
655   PetscInt    offd_so_far = 0, onz;
656 
657   PetscFunctionBegin;
658   /* Iterate over all rows of the matrix */
659   for (j = 0; j < am; j++) {
660     dnz = onz = 0;
661     /*  Iterate over all non-zero columns of the current row */
662     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
663       /* If column is in the diagonal */
664       if (mat_j[col] >= cstart && mat_j[col] < cend) {
665         aj[diag_so_far++] = mat_j[col] - cstart;
666         dnz++;
667       } else { /* off-diagonal entries */
668         bj[offd_so_far++] = mat_j[col];
669         onz++;
670       }
671     }
672     ailen[j] = dnz;
673     bilen[j] = onz;
674   }
675   PetscFunctionReturn(PETSC_SUCCESS);
676 }
677 
678 /*
679     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
680     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
681     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
682     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
683     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
684 */
685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
686 {
687   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
688   Mat          A    = aij->A; /* diagonal part of the matrix */
689   Mat          B    = aij->B; /* off-diagonal part of the matrix */
690   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
691   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
692   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
693   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
694   PetscInt    *ailen = a->ilen, *aj = a->j;
695   PetscInt    *bilen = b->ilen, *bj = b->j;
696   PetscInt     am          = aij->A->rmap->n, j;
697   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
698   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
699   PetscScalar *aa = a->a, *ba = b->a;
700 
701   PetscFunctionBegin;
702   /* Iterate over all rows of the matrix */
703   for (j = 0; j < am; j++) {
704     dnz_row = onz_row = 0;
705     rowstart_offd     = full_offd_i[j];
706     rowstart_diag     = full_diag_i[j];
707     /*  Iterate over all non-zero columns of the current row */
708     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
709       /* If column is in the diagonal */
710       if (mat_j[col] >= cstart && mat_j[col] < cend) {
711         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
712         aa[rowstart_diag + dnz_row] = mat_a[col];
713         dnz_row++;
714       } else { /* off-diagonal entries */
715         bj[rowstart_offd + onz_row] = mat_j[col];
716         ba[rowstart_offd + onz_row] = mat_a[col];
717         onz_row++;
718       }
719     }
720     ailen[j] = dnz_row;
721     bilen[j] = onz_row;
722   }
723   PetscFunctionReturn(PETSC_SUCCESS);
724 }
725 
726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
727 {
728   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
729   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
730   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
731 
732   PetscFunctionBegin;
733   for (i = 0; i < m; i++) {
734     if (idxm[i] < 0) continue; /* negative row */
735     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
736     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
737     row = idxm[i] - rstart;
738     for (j = 0; j < n; j++) {
739       if (idxn[j] < 0) continue; /* negative column */
740       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
741       if (idxn[j] >= cstart && idxn[j] < cend) {
742         col = idxn[j] - cstart;
743         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
744       } else {
745         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
746 #if defined(PETSC_USE_CTABLE)
747         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
748         col--;
749 #else
750         col = aij->colmap[idxn[j]] - 1;
751 #endif
752         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
753         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
754       }
755     }
756   }
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
761 {
762   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
763   PetscInt    nstash, reallocs;
764 
765   PetscFunctionBegin;
766   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
767 
768   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
769   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
770   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
771   PetscFunctionReturn(PETSC_SUCCESS);
772 }
773 
774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
775 {
776   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
777   PetscMPIInt  n;
778   PetscInt     i, j, rstart, ncols, flg;
779   PetscInt    *row, *col;
780   PetscBool    other_disassembled;
781   PetscScalar *val;
782 
783   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
784 
785   PetscFunctionBegin;
786   if (!aij->donotstash && !mat->nooffprocentries) {
787     while (1) {
788       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
789       if (!flg) break;
790 
791       for (i = 0; i < n;) {
792         /* Now identify the consecutive vals belonging to the same row */
793         for (j = i, rstart = row[j]; j < n; j++) {
794           if (row[j] != rstart) break;
795         }
796         if (j < n) ncols = j - i;
797         else ncols = n - i;
798         /* Now assemble all these values with a single function call */
799         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
800         i = j;
801       }
802     }
803     PetscCall(MatStashScatterEnd_Private(&mat->stash));
804   }
805 #if defined(PETSC_HAVE_DEVICE)
806   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
807   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
808   if (mat->boundtocpu) {
809     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
810     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
811   }
812 #endif
813   PetscCall(MatAssemblyBegin(aij->A, mode));
814   PetscCall(MatAssemblyEnd(aij->A, mode));
815 
816   /* determine if any processor has disassembled, if so we must
817      also disassemble ourself, in order that we may reassemble. */
818   /*
819      if nonzero structure of submatrix B cannot change then we know that
820      no processor disassembled thus we can skip this stuff
821   */
822   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
823     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
824     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
825       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
826     }
827   }
828   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
829   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
830 #if defined(PETSC_HAVE_DEVICE)
831   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
832 #endif
833   PetscCall(MatAssemblyBegin(aij->B, mode));
834   PetscCall(MatAssemblyEnd(aij->B, mode));
835 
836   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
837 
838   aij->rowvalues = NULL;
839 
840   PetscCall(VecDestroy(&aij->diag));
841 
842   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
843   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
844     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
845     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
846   }
847 #if defined(PETSC_HAVE_DEVICE)
848   mat->offloadmask = PETSC_OFFLOAD_BOTH;
849 #endif
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
854 {
855   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
856 
857   PetscFunctionBegin;
858   PetscCall(MatZeroEntries(l->A));
859   PetscCall(MatZeroEntries(l->B));
860   PetscFunctionReturn(PETSC_SUCCESS);
861 }
862 
863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
864 {
865   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
866   PetscInt   *lrows;
867   PetscInt    r, len;
868   PetscBool   cong;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
873   PetscCall(MatHasCongruentLayouts(A, &cong));
874   /* fix right-hand side if needed */
875   if (x && b) {
876     const PetscScalar *xx;
877     PetscScalar       *bb;
878 
879     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
880     PetscCall(VecGetArrayRead(x, &xx));
881     PetscCall(VecGetArray(b, &bb));
882     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
883     PetscCall(VecRestoreArrayRead(x, &xx));
884     PetscCall(VecRestoreArray(b, &bb));
885   }
886 
887   if (diag != 0.0 && cong) {
888     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
889     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
890   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
891     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
892     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
893     PetscInt    nnwA, nnwB;
894     PetscBool   nnzA, nnzB;
895 
896     nnwA = aijA->nonew;
897     nnwB = aijB->nonew;
898     nnzA = aijA->keepnonzeropattern;
899     nnzB = aijB->keepnonzeropattern;
900     if (!nnzA) {
901       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
902       aijA->nonew = 0;
903     }
904     if (!nnzB) {
905       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
906       aijB->nonew = 0;
907     }
908     /* Must zero here before the next loop */
909     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
910     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
911     for (r = 0; r < len; ++r) {
912       const PetscInt row = lrows[r] + A->rmap->rstart;
913       if (row >= A->cmap->N) continue;
914       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
915     }
916     aijA->nonew = nnwA;
917     aijB->nonew = nnwB;
918   } else {
919     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
920     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
921   }
922   PetscCall(PetscFree(lrows));
923   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
924   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
928     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
929     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
930   }
931   PetscFunctionReturn(PETSC_SUCCESS);
932 }
933 
934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
935 {
936   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
937   PetscInt           n = A->rmap->n;
938   PetscInt           i, j, r, m, len = 0;
939   PetscInt          *lrows, *owners = A->rmap->range;
940   PetscMPIInt        p = 0;
941   PetscSFNode       *rrows;
942   PetscSF            sf;
943   const PetscScalar *xx;
944   PetscScalar       *bb, *mask, *aij_a;
945   Vec                xmask, lmask;
946   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
947   const PetscInt    *aj, *ii, *ridx;
948   PetscScalar       *aa;
949 
950   PetscFunctionBegin;
951   /* Create SF where leaves are input rows and roots are owned rows */
952   PetscCall(PetscMalloc1(n, &lrows));
953   for (r = 0; r < n; ++r) lrows[r] = -1;
954   PetscCall(PetscMalloc1(N, &rrows));
955   for (r = 0; r < N; ++r) {
956     const PetscInt idx = rows[r];
957     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
958     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
959       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
960     }
961     rrows[r].rank  = p;
962     rrows[r].index = rows[r] - owners[p];
963   }
964   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
965   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
966   /* Collect flags for rows to be zeroed */
967   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
968   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
969   PetscCall(PetscSFDestroy(&sf));
970   /* Compress and put in row numbers */
971   for (r = 0; r < n; ++r)
972     if (lrows[r] >= 0) lrows[len++] = r;
973   /* zero diagonal part of matrix */
974   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
975   /* handle off-diagonal part of matrix */
976   PetscCall(MatCreateVecs(A, &xmask, NULL));
977   PetscCall(VecDuplicate(l->lvec, &lmask));
978   PetscCall(VecGetArray(xmask, &bb));
979   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
980   PetscCall(VecRestoreArray(xmask, &bb));
981   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
982   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
983   PetscCall(VecDestroy(&xmask));
984   if (x && b) { /* this code is buggy when the row and column layout don't match */
985     PetscBool cong;
986 
987     PetscCall(MatHasCongruentLayouts(A, &cong));
988     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
989     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
990     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
991     PetscCall(VecGetArrayRead(l->lvec, &xx));
992     PetscCall(VecGetArray(b, &bb));
993   }
994   PetscCall(VecGetArray(lmask, &mask));
995   /* remove zeroed rows of off-diagonal matrix */
996   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
997   ii = aij->i;
998   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
999   /* loop over all elements of off process part of matrix zeroing removed columns*/
1000   if (aij->compressedrow.use) {
1001     m    = aij->compressedrow.nrows;
1002     ii   = aij->compressedrow.i;
1003     ridx = aij->compressedrow.rindex;
1004     for (i = 0; i < m; i++) {
1005       n  = ii[i + 1] - ii[i];
1006       aj = aij->j + ii[i];
1007       aa = aij_a + ii[i];
1008 
1009       for (j = 0; j < n; j++) {
1010         if (PetscAbsScalar(mask[*aj])) {
1011           if (b) bb[*ridx] -= *aa * xx[*aj];
1012           *aa = 0.0;
1013         }
1014         aa++;
1015         aj++;
1016       }
1017       ridx++;
1018     }
1019   } else { /* do not use compressed row format */
1020     m = l->B->rmap->n;
1021     for (i = 0; i < m; i++) {
1022       n  = ii[i + 1] - ii[i];
1023       aj = aij->j + ii[i];
1024       aa = aij_a + ii[i];
1025       for (j = 0; j < n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           if (b) bb[i] -= *aa * xx[*aj];
1028           *aa = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033     }
1034   }
1035   if (x && b) {
1036     PetscCall(VecRestoreArray(b, &bb));
1037     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1038   }
1039   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1040   PetscCall(VecRestoreArray(lmask, &mask));
1041   PetscCall(VecDestroy(&lmask));
1042   PetscCall(PetscFree(lrows));
1043 
1044   /* only change matrix nonzero state if pattern was allowed to be changed */
1045   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1046     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1047     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1048   }
1049   PetscFunctionReturn(PETSC_SUCCESS);
1050 }
1051 
1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1055   PetscInt    nt;
1056   VecScatter  Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   PetscCall(VecGetLocalSize(xx, &nt));
1060   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1061   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1062   PetscUseTypeMethod(a->A, mult, xx, yy);
1063   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067 
1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1069 {
1070   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1078 {
1079   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1080   VecScatter  Mvctx = a->Mvctx;
1081 
1082   PetscFunctionBegin;
1083   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1084   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1085   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1086   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1091 {
1092   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1093 
1094   PetscFunctionBegin;
1095   /* do nondiagonal part */
1096   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1097   /* do local part */
1098   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1099   /* add partial results together */
1100   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1101   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1106 {
1107   MPI_Comm    comm;
1108   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1109   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1110   IS          Me, Notme;
1111   PetscInt    M, N, first, last, *notme, i;
1112   PetscBool   lf;
1113   PetscMPIInt size;
1114 
1115   PetscFunctionBegin;
1116   /* Easy test: symmetric diagonal block */
1117   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1118   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1119   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1120   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1121   PetscCallMPI(MPI_Comm_size(comm, &size));
1122   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1123 
1124   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1125   PetscCall(MatGetSize(Amat, &M, &N));
1126   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1127   PetscCall(PetscMalloc1(N - last + first, &notme));
1128   for (i = 0; i < first; i++) notme[i] = i;
1129   for (i = last; i < M; i++) notme[i - last + first] = i;
1130   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1131   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1132   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1133   Aoff = Aoffs[0];
1134   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1135   Boff = Boffs[0];
1136   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1137   PetscCall(MatDestroyMatrices(1, &Aoffs));
1138   PetscCall(MatDestroyMatrices(1, &Boffs));
1139   PetscCall(ISDestroy(&Me));
1140   PetscCall(ISDestroy(&Notme));
1141   PetscCall(PetscFree(notme));
1142   PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144 
1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1146 {
1147   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1148 
1149   PetscFunctionBegin;
1150   /* do nondiagonal part */
1151   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1152   /* do local part */
1153   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1154   /* add partial results together */
1155   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1156   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1157   PetscFunctionReturn(PETSC_SUCCESS);
1158 }
1159 
1160 /*
1161   This only works correctly for square matrices where the subblock A->A is the
1162    diagonal block
1163 */
1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1165 {
1166   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1167 
1168   PetscFunctionBegin;
1169   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1170   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1171   PetscCall(MatGetDiagonal(a->A, v));
1172   PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174 
1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1176 {
1177   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(MatScale(a->A, aa));
1181   PetscCall(MatScale(a->B, aa));
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1186 {
1187   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1188   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1189   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1190   const PetscInt    *garray = aij->garray;
1191   const PetscScalar *aa, *ba;
1192   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1193   PetscInt64         nz, hnz;
1194   PetscInt          *rowlens;
1195   PetscInt          *colidxs;
1196   PetscScalar       *matvals;
1197   PetscMPIInt        rank;
1198 
1199   PetscFunctionBegin;
1200   PetscCall(PetscViewerSetUp(viewer));
1201 
1202   M  = mat->rmap->N;
1203   N  = mat->cmap->N;
1204   m  = mat->rmap->n;
1205   rs = mat->rmap->rstart;
1206   cs = mat->cmap->rstart;
1207   nz = A->nz + B->nz;
1208 
1209   /* write matrix header */
1210   header[0] = MAT_FILE_CLASSID;
1211   header[1] = M;
1212   header[2] = N;
1213   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1214   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1215   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1216   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1217 
1218   /* fill in and store row lengths  */
1219   PetscCall(PetscMalloc1(m, &rowlens));
1220   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1221   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1222   PetscCall(PetscFree(rowlens));
1223 
1224   /* fill in and store column indices */
1225   PetscCall(PetscMalloc1(nz, &colidxs));
1226   for (cnt = 0, i = 0; i < m; i++) {
1227     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1228       if (garray[B->j[jb]] > cs) break;
1229       colidxs[cnt++] = garray[B->j[jb]];
1230     }
1231     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1232     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1233   }
1234   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1235   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1236   PetscCall(PetscFree(colidxs));
1237 
1238   /* fill in and store nonzero values */
1239   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1240   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1241   PetscCall(PetscMalloc1(nz, &matvals));
1242   for (cnt = 0, i = 0; i < m; i++) {
1243     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1244       if (garray[B->j[jb]] > cs) break;
1245       matvals[cnt++] = ba[jb];
1246     }
1247     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1248     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1249   }
1250   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1251   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1252   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1254   PetscCall(PetscFree(matvals));
1255 
1256   /* write block size option to the viewer's .info file */
1257   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1258   PetscFunctionReturn(PETSC_SUCCESS);
1259 }
1260 
1261 #include <petscdraw.h>
1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1263 {
1264   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1265   PetscMPIInt       rank = aij->rank, size = aij->size;
1266   PetscBool         isdraw, iascii, isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1272   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1273   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1274   if (iascii) {
1275     PetscCall(PetscViewerGetFormat(viewer, &format));
1276     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1277       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1278       PetscCall(PetscMalloc1(size, &nz));
1279       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1280       for (i = 0; i < size; i++) {
1281         nmax = PetscMax(nmax, nz[i]);
1282         nmin = PetscMin(nmin, nz[i]);
1283         navg += nz[i];
1284       }
1285       PetscCall(PetscFree(nz));
1286       navg = navg / size;
1287       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1288       PetscFunctionReturn(PETSC_SUCCESS);
1289     }
1290     PetscCall(PetscViewerGetFormat(viewer, &format));
1291     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1292       MatInfo   info;
1293       PetscInt *inodes = NULL;
1294 
1295       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1296       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1297       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1298       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1299       if (!inodes) {
1300         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1301                                                      info.memory));
1302       } else {
1303         PetscCall(
1304           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1305       }
1306       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1307       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1308       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1309       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1310       PetscCall(PetscViewerFlush(viewer));
1311       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1312       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1313       PetscCall(VecScatterView(aij->Mvctx, viewer));
1314       PetscFunctionReturn(PETSC_SUCCESS);
1315     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1316       PetscInt inodecount, inodelimit, *inodes;
1317       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1318       if (inodes) {
1319         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1320       } else {
1321         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1322       }
1323       PetscFunctionReturn(PETSC_SUCCESS);
1324     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1325       PetscFunctionReturn(PETSC_SUCCESS);
1326     }
1327   } else if (isbinary) {
1328     if (size == 1) {
1329       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1330       PetscCall(MatView(aij->A, viewer));
1331     } else {
1332       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1333     }
1334     PetscFunctionReturn(PETSC_SUCCESS);
1335   } else if (iascii && size == 1) {
1336     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1337     PetscCall(MatView(aij->A, viewer));
1338     PetscFunctionReturn(PETSC_SUCCESS);
1339   } else if (isdraw) {
1340     PetscDraw draw;
1341     PetscBool isnull;
1342     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1343     PetscCall(PetscDrawIsNull(draw, &isnull));
1344     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1345   }
1346 
1347   { /* assemble the entire matrix onto first processor */
1348     Mat A = NULL, Av;
1349     IS  isrow, iscol;
1350 
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1352     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1353     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1354     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1355     /*  The commented code uses MatCreateSubMatrices instead */
1356     /*
1357     Mat *AA, A = NULL, Av;
1358     IS  isrow,iscol;
1359 
1360     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1361     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1362     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1363     if (rank == 0) {
1364        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1365        A    = AA[0];
1366        Av   = AA[0];
1367     }
1368     PetscCall(MatDestroySubMatrices(1,&AA));
1369 */
1370     PetscCall(ISDestroy(&iscol));
1371     PetscCall(ISDestroy(&isrow));
1372     /*
1373        Everyone has to call to draw the matrix since the graphics waits are
1374        synchronized across all processors that share the PetscDraw object
1375     */
1376     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1377     if (rank == 0) {
1378       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1379       PetscCall(MatView_SeqAIJ(Av, sviewer));
1380     }
1381     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1382     PetscCall(MatDestroy(&A));
1383   }
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1388 {
1389   PetscBool iascii, isdraw, issocket, isbinary;
1390 
1391   PetscFunctionBegin;
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1395   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1396   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1397   PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399 
1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1401 {
1402   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1403   Vec         bb1 = NULL;
1404   PetscBool   hasop;
1405 
1406   PetscFunctionBegin;
1407   if (flag == SOR_APPLY_UPPER) {
1408     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1409     PetscFunctionReturn(PETSC_SUCCESS);
1410   }
1411 
1412   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1413 
1414   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419 
1420     while (its--) {
1421       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1423 
1424       /* update rhs: bb1 = bb - B*x */
1425       PetscCall(VecScale(mat->lvec, -1.0));
1426       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1427 
1428       /* local sweep */
1429       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1430     }
1431   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1432     if (flag & SOR_ZERO_INITIAL_GUESS) {
1433       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1434       its--;
1435     }
1436     while (its--) {
1437       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1439 
1440       /* update rhs: bb1 = bb - B*x */
1441       PetscCall(VecScale(mat->lvec, -1.0));
1442       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1443 
1444       /* local sweep */
1445       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1446     }
1447   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1448     if (flag & SOR_ZERO_INITIAL_GUESS) {
1449       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1450       its--;
1451     }
1452     while (its--) {
1453       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       PetscCall(VecScale(mat->lvec, -1.0));
1458       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1459 
1460       /* local sweep */
1461       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1462     }
1463   } else if (flag & SOR_EISENSTAT) {
1464     Vec xx1;
1465 
1466     PetscCall(VecDuplicate(bb, &xx1));
1467     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1468 
1469     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1470     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1471     if (!mat->diag) {
1472       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1473       PetscCall(MatGetDiagonal(matin, mat->diag));
1474     }
1475     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1476     if (hasop) {
1477       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1478     } else {
1479       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1480     }
1481     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1482 
1483     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1484 
1485     /* local sweep */
1486     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1487     PetscCall(VecAXPY(xx, 1.0, xx1));
1488     PetscCall(VecDestroy(&xx1));
1489   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1490 
1491   PetscCall(VecDestroy(&bb1));
1492 
1493   matin->factorerrortype = mat->A->factorerrortype;
1494   PetscFunctionReturn(PETSC_SUCCESS);
1495 }
1496 
1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1498 {
1499   Mat             aA, aB, Aperm;
1500   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1501   PetscScalar    *aa, *ba;
1502   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1503   PetscSF         rowsf, sf;
1504   IS              parcolp = NULL;
1505   PetscBool       done;
1506 
1507   PetscFunctionBegin;
1508   PetscCall(MatGetLocalSize(A, &m, &n));
1509   PetscCall(ISGetIndices(rowp, &rwant));
1510   PetscCall(ISGetIndices(colp, &cwant));
1511   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1512 
1513   /* Invert row permutation to find out where my rows should go */
1514   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1515   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1516   PetscCall(PetscSFSetFromOptions(rowsf));
1517   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1518   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1519   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1520 
1521   /* Invert column permutation to find out where my columns should go */
1522   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1523   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1524   PetscCall(PetscSFSetFromOptions(sf));
1525   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1526   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1527   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1528   PetscCall(PetscSFDestroy(&sf));
1529 
1530   PetscCall(ISRestoreIndices(rowp, &rwant));
1531   PetscCall(ISRestoreIndices(colp, &cwant));
1532   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1533 
1534   /* Find out where my gcols should go */
1535   PetscCall(MatGetSize(aB, NULL, &ng));
1536   PetscCall(PetscMalloc1(ng, &gcdest));
1537   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1538   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1539   PetscCall(PetscSFSetFromOptions(sf));
1540   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1541   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1542   PetscCall(PetscSFDestroy(&sf));
1543 
1544   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1545   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1546   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1547   for (i = 0; i < m; i++) {
1548     PetscInt    row = rdest[i];
1549     PetscMPIInt rowner;
1550     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1551     for (j = ai[i]; j < ai[i + 1]; j++) {
1552       PetscInt    col = cdest[aj[j]];
1553       PetscMPIInt cowner;
1554       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1555       if (rowner == cowner) dnnz[i]++;
1556       else onnz[i]++;
1557     }
1558     for (j = bi[i]; j < bi[i + 1]; j++) {
1559       PetscInt    col = gcdest[bj[j]];
1560       PetscMPIInt cowner;
1561       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1562       if (rowner == cowner) dnnz[i]++;
1563       else onnz[i]++;
1564     }
1565   }
1566   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1567   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1568   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1570   PetscCall(PetscSFDestroy(&rowsf));
1571 
1572   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1573   PetscCall(MatSeqAIJGetArray(aA, &aa));
1574   PetscCall(MatSeqAIJGetArray(aB, &ba));
1575   for (i = 0; i < m; i++) {
1576     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1577     PetscInt  j0, rowlen;
1578     rowlen = ai[i + 1] - ai[i];
1579     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1580       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1581       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1582     }
1583     rowlen = bi[i + 1] - bi[i];
1584     for (j0 = j = 0; j < rowlen; j0 = j) {
1585       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1586       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1587     }
1588   }
1589   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1590   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1591   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1592   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1593   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1594   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1595   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1596   PetscCall(PetscFree3(work, rdest, cdest));
1597   PetscCall(PetscFree(gcdest));
1598   if (parcolp) PetscCall(ISDestroy(&colp));
1599   *B = Aperm;
1600   PetscFunctionReturn(PETSC_SUCCESS);
1601 }
1602 
1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1604 {
1605   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1606 
1607   PetscFunctionBegin;
1608   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1609   if (ghosts) *ghosts = aij->garray;
1610   PetscFunctionReturn(PETSC_SUCCESS);
1611 }
1612 
1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1614 {
1615   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1616   Mat            A = mat->A, B = mat->B;
1617   PetscLogDouble isend[5], irecv[5];
1618 
1619   PetscFunctionBegin;
1620   info->block_size = 1.0;
1621   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1622 
1623   isend[0] = info->nz_used;
1624   isend[1] = info->nz_allocated;
1625   isend[2] = info->nz_unneeded;
1626   isend[3] = info->memory;
1627   isend[4] = info->mallocs;
1628 
1629   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1630 
1631   isend[0] += info->nz_used;
1632   isend[1] += info->nz_allocated;
1633   isend[2] += info->nz_unneeded;
1634   isend[3] += info->memory;
1635   isend[4] += info->mallocs;
1636   if (flag == MAT_LOCAL) {
1637     info->nz_used      = isend[0];
1638     info->nz_allocated = isend[1];
1639     info->nz_unneeded  = isend[2];
1640     info->memory       = isend[3];
1641     info->mallocs      = isend[4];
1642   } else if (flag == MAT_GLOBAL_MAX) {
1643     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1644 
1645     info->nz_used      = irecv[0];
1646     info->nz_allocated = irecv[1];
1647     info->nz_unneeded  = irecv[2];
1648     info->memory       = irecv[3];
1649     info->mallocs      = irecv[4];
1650   } else if (flag == MAT_GLOBAL_SUM) {
1651     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1652 
1653     info->nz_used      = irecv[0];
1654     info->nz_allocated = irecv[1];
1655     info->nz_unneeded  = irecv[2];
1656     info->memory       = irecv[3];
1657     info->mallocs      = irecv[4];
1658   }
1659   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1660   info->fill_ratio_needed = 0;
1661   info->factor_mallocs    = 0;
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1666 {
1667   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1668 
1669   PetscFunctionBegin;
1670   switch (op) {
1671   case MAT_NEW_NONZERO_LOCATIONS:
1672   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1673   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1674   case MAT_KEEP_NONZERO_PATTERN:
1675   case MAT_NEW_NONZERO_LOCATION_ERR:
1676   case MAT_USE_INODES:
1677   case MAT_IGNORE_ZERO_ENTRIES:
1678   case MAT_FORM_EXPLICIT_TRANSPOSE:
1679     MatCheckPreallocated(A, 1);
1680     PetscCall(MatSetOption(a->A, op, flg));
1681     PetscCall(MatSetOption(a->B, op, flg));
1682     break;
1683   case MAT_ROW_ORIENTED:
1684     MatCheckPreallocated(A, 1);
1685     a->roworiented = flg;
1686 
1687     PetscCall(MatSetOption(a->A, op, flg));
1688     PetscCall(MatSetOption(a->B, op, flg));
1689     break;
1690   case MAT_IGNORE_OFF_PROC_ENTRIES:
1691     a->donotstash = flg;
1692     break;
1693   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1694   case MAT_SPD:
1695   case MAT_SYMMETRIC:
1696   case MAT_STRUCTURALLY_SYMMETRIC:
1697   case MAT_HERMITIAN:
1698   case MAT_SYMMETRY_ETERNAL:
1699   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1700   case MAT_SPD_ETERNAL:
1701     /* if the diagonal matrix is square it inherits some of the properties above */
1702     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1703     break;
1704   case MAT_SUBMAT_SINGLEIS:
1705     A->submat_singleis = flg;
1706     break;
1707   default:
1708     break;
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1714 {
1715   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1716   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1717   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1718   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1719   PetscInt    *cmap, *idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1730     PetscInt    max = 1, tmp;
1731     for (i = 0; i < matin->rmap->n; i++) {
1732       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1736   }
1737 
1738   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA;
1742   pcA = &cworkA;
1743   pvB = &vworkB;
1744   pcB = &cworkB;
1745   if (!v) {
1746     pvA = NULL;
1747     pvB = NULL;
1748   }
1749   if (!idx) {
1750     pcA = NULL;
1751     if (!v) pcB = NULL;
1752   }
1753   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1754   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1755   nztot = nzA + nzB;
1756 
1757   cmap = mat->garray;
1758   if (v || idx) {
1759     if (nztot) {
1760       /* Sort by increasing column numbers, assuming A and B already sorted */
1761       PetscInt imark = -1;
1762       if (v) {
1763         *v = v_p = mat->rowvalues;
1764         for (i = 0; i < nzB; i++) {
1765           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1766           else break;
1767         }
1768         imark = i;
1769         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1770         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1771       }
1772       if (idx) {
1773         *idx = idx_p = mat->rowindices;
1774         if (imark > -1) {
1775           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1776         } else {
1777           for (i = 0; i < nzB; i++) {
1778             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1779             else break;
1780           }
1781           imark = i;
1782         }
1783         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1784         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1785       }
1786     } else {
1787       if (idx) *idx = NULL;
1788       if (v) *v = NULL;
1789     }
1790   }
1791   *nz = nztot;
1792   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1793   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1798 {
1799   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1800 
1801   PetscFunctionBegin;
1802   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1803   aij->getrowactive = PETSC_FALSE;
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1808 {
1809   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1810   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1811   PetscInt         i, j, cstart = mat->cmap->rstart;
1812   PetscReal        sum = 0.0;
1813   const MatScalar *v, *amata, *bmata;
1814 
1815   PetscFunctionBegin;
1816   if (aij->size == 1) {
1817     PetscCall(MatNorm(aij->A, type, norm));
1818   } else {
1819     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1820     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1821     if (type == NORM_FROBENIUS) {
1822       v = amata;
1823       for (i = 0; i < amat->nz; i++) {
1824         sum += PetscRealPart(PetscConj(*v) * (*v));
1825         v++;
1826       }
1827       v = bmata;
1828       for (i = 0; i < bmat->nz; i++) {
1829         sum += PetscRealPart(PetscConj(*v) * (*v));
1830         v++;
1831       }
1832       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1833       *norm = PetscSqrtReal(*norm);
1834       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1835     } else if (type == NORM_1) { /* max column norm */
1836       PetscReal *tmp;
1837       PetscInt  *jj, *garray = aij->garray;
1838       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1839       *norm = 0.0;
1840       v     = amata;
1841       jj    = amat->j;
1842       for (j = 0; j < amat->nz; j++) {
1843         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1844         v++;
1845       }
1846       v  = bmata;
1847       jj = bmat->j;
1848       for (j = 0; j < bmat->nz; j++) {
1849         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1850         v++;
1851       }
1852       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1853       for (j = 0; j < mat->cmap->N; j++) {
1854         if (tmp[j] > *norm) *norm = tmp[j];
1855       }
1856       PetscCall(PetscFree(tmp));
1857       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1858     } else if (type == NORM_INFINITY) { /* max row norm */
1859       PetscReal ntemp = 0.0;
1860       for (j = 0; j < aij->A->rmap->n; j++) {
1861         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1862         sum = 0.0;
1863         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1864           sum += PetscAbsScalar(*v);
1865           v++;
1866         }
1867         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1868         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1869           sum += PetscAbsScalar(*v);
1870           v++;
1871         }
1872         if (sum > ntemp) ntemp = sum;
1873       }
1874       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1875       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1876     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1877     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1878     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1879   }
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1884 {
1885   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1886   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1887   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1888   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1889   Mat              B, A_diag, *B_diag;
1890   const MatScalar *pbv, *bv;
1891 
1892   PetscFunctionBegin;
1893   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1894   ma = A->rmap->n;
1895   na = A->cmap->n;
1896   mb = a->B->rmap->n;
1897   nb = a->B->cmap->n;
1898   ai = Aloc->i;
1899   aj = Aloc->j;
1900   bi = Bloc->i;
1901   bj = Bloc->j;
1902   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1903     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1904     PetscSFNode         *oloc;
1905     PETSC_UNUSED PetscSF sf;
1906 
1907     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1908     /* compute d_nnz for preallocation */
1909     PetscCall(PetscArrayzero(d_nnz, na));
1910     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1911     /* compute local off-diagonal contributions */
1912     PetscCall(PetscArrayzero(g_nnz, nb));
1913     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1914     /* map those to global */
1915     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1916     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1917     PetscCall(PetscSFSetFromOptions(sf));
1918     PetscCall(PetscArrayzero(o_nnz, na));
1919     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1920     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1921     PetscCall(PetscSFDestroy(&sf));
1922 
1923     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1924     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1925     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1926     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1927     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1928     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1929   } else {
1930     B = *matout;
1931     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1932   }
1933 
1934   b           = (Mat_MPIAIJ *)B->data;
1935   A_diag      = a->A;
1936   B_diag      = &b->A;
1937   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1938   A_diag_ncol = A_diag->cmap->N;
1939   B_diag_ilen = sub_B_diag->ilen;
1940   B_diag_i    = sub_B_diag->i;
1941 
1942   /* Set ilen for diagonal of B */
1943   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1944 
1945   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1946   very quickly (=without using MatSetValues), because all writes are local. */
1947   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1948   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1949 
1950   /* copy over the B part */
1951   PetscCall(PetscMalloc1(bi[mb], &cols));
1952   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1953   pbv = bv;
1954   row = A->rmap->rstart;
1955   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1956   cols_tmp = cols;
1957   for (i = 0; i < mb; i++) {
1958     ncol = bi[i + 1] - bi[i];
1959     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1960     row++;
1961     if (pbv) pbv += ncol;
1962     if (cols_tmp) cols_tmp += ncol;
1963   }
1964   PetscCall(PetscFree(cols));
1965   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1966 
1967   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1968   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1970     *matout = B;
1971   } else {
1972     PetscCall(MatHeaderMerge(A, &B));
1973   }
1974   PetscFunctionReturn(PETSC_SUCCESS);
1975 }
1976 
1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1978 {
1979   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1980   Mat         a = aij->A, b = aij->B;
1981   PetscInt    s1, s2, s3;
1982 
1983   PetscFunctionBegin;
1984   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1985   if (rr) {
1986     PetscCall(VecGetLocalSize(rr, &s1));
1987     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1988     /* Overlap communication with computation. */
1989     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1990   }
1991   if (ll) {
1992     PetscCall(VecGetLocalSize(ll, &s1));
1993     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1994     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1995   }
1996   /* scale  the diagonal block */
1997   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1998 
1999   if (rr) {
2000     /* Do a scatter end and then right scale the off-diagonal block */
2001     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2002     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2003   }
2004   PetscFunctionReturn(PETSC_SUCCESS);
2005 }
2006 
2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2008 {
2009   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2010 
2011   PetscFunctionBegin;
2012   PetscCall(MatSetUnfactored(a->A));
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2017 {
2018   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2019   Mat         a, b, c, d;
2020   PetscBool   flg;
2021 
2022   PetscFunctionBegin;
2023   a = matA->A;
2024   b = matA->B;
2025   c = matB->A;
2026   d = matB->B;
2027 
2028   PetscCall(MatEqual(a, c, &flg));
2029   if (flg) PetscCall(MatEqual(b, d, &flg));
2030   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2035 {
2036   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2037   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2038 
2039   PetscFunctionBegin;
2040   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2041   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2042     /* because of the column compression in the off-processor part of the matrix a->B,
2043        the number of columns in a->B and b->B may be different, hence we cannot call
2044        the MatCopy() directly on the two parts. If need be, we can provide a more
2045        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2046        then copying the submatrices */
2047     PetscCall(MatCopy_Basic(A, B, str));
2048   } else {
2049     PetscCall(MatCopy(a->A, b->A, str));
2050     PetscCall(MatCopy(a->B, b->B, str));
2051   }
2052   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
2056 /*
2057    Computes the number of nonzeros per row needed for preallocation when X and Y
2058    have different nonzero structure.
2059 */
2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2061 {
2062   PetscInt i, j, k, nzx, nzy;
2063 
2064   PetscFunctionBegin;
2065   /* Set the number of nonzeros in the new matrix */
2066   for (i = 0; i < m; i++) {
2067     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2068     nzx    = xi[i + 1] - xi[i];
2069     nzy    = yi[i + 1] - yi[i];
2070     nnz[i] = 0;
2071     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2072       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2073       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2074       nnz[i]++;
2075     }
2076     for (; k < nzy; k++) nnz[i]++;
2077   }
2078   PetscFunctionReturn(PETSC_SUCCESS);
2079 }
2080 
2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2083 {
2084   PetscInt    m = Y->rmap->N;
2085   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2086   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2087 
2088   PetscFunctionBegin;
2089   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2090   PetscFunctionReturn(PETSC_SUCCESS);
2091 }
2092 
2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2094 {
2095   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2096 
2097   PetscFunctionBegin;
2098   if (str == SAME_NONZERO_PATTERN) {
2099     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2100     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2101   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2102     PetscCall(MatAXPY_Basic(Y, a, X, str));
2103   } else {
2104     Mat       B;
2105     PetscInt *nnz_d, *nnz_o;
2106 
2107     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2108     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2109     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2110     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2111     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2112     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2113     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2114     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2115     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2116     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2117     PetscCall(MatHeaderMerge(Y, &B));
2118     PetscCall(PetscFree(nnz_d));
2119     PetscCall(PetscFree(nnz_o));
2120   }
2121   PetscFunctionReturn(PETSC_SUCCESS);
2122 }
2123 
2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2125 
2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2127 {
2128   PetscFunctionBegin;
2129   if (PetscDefined(USE_COMPLEX)) {
2130     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2131 
2132     PetscCall(MatConjugate_SeqAIJ(aij->A));
2133     PetscCall(MatConjugate_SeqAIJ(aij->B));
2134   }
2135   PetscFunctionReturn(PETSC_SUCCESS);
2136 }
2137 
2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2139 {
2140   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2141 
2142   PetscFunctionBegin;
2143   PetscCall(MatRealPart(a->A));
2144   PetscCall(MatRealPart(a->B));
2145   PetscFunctionReturn(PETSC_SUCCESS);
2146 }
2147 
2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2149 {
2150   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2151 
2152   PetscFunctionBegin;
2153   PetscCall(MatImaginaryPart(a->A));
2154   PetscCall(MatImaginaryPart(a->B));
2155   PetscFunctionReturn(PETSC_SUCCESS);
2156 }
2157 
2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2159 {
2160   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2161   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2162   PetscScalar       *vv;
2163   Vec                vB, vA;
2164   const PetscScalar *va, *vb;
2165 
2166   PetscFunctionBegin;
2167   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2168   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2169 
2170   PetscCall(VecGetArrayRead(vA, &va));
2171   if (idx) {
2172     for (i = 0; i < m; i++) {
2173       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2174     }
2175   }
2176 
2177   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2178   PetscCall(PetscMalloc1(m, &idxb));
2179   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2180 
2181   PetscCall(VecGetArrayWrite(v, &vv));
2182   PetscCall(VecGetArrayRead(vB, &vb));
2183   for (i = 0; i < m; i++) {
2184     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2185       vv[i] = vb[i];
2186       if (idx) idx[i] = a->garray[idxb[i]];
2187     } else {
2188       vv[i] = va[i];
2189       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2190     }
2191   }
2192   PetscCall(VecRestoreArrayWrite(v, &vv));
2193   PetscCall(VecRestoreArrayRead(vA, &va));
2194   PetscCall(VecRestoreArrayRead(vB, &vb));
2195   PetscCall(PetscFree(idxb));
2196   PetscCall(VecDestroy(&vA));
2197   PetscCall(VecDestroy(&vB));
2198   PetscFunctionReturn(PETSC_SUCCESS);
2199 }
2200 
2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2202 {
2203   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2204   Vec         vB, vA;
2205 
2206   PetscFunctionBegin;
2207   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2208   PetscCall(MatGetRowSumAbs(a->A, vA));
2209   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2210   PetscCall(MatGetRowSumAbs(a->B, vB));
2211   PetscCall(VecAXPY(vA, 1.0, vB));
2212   PetscCall(VecDestroy(&vB));
2213   PetscCall(VecCopy(vA, v));
2214   PetscCall(VecDestroy(&vA));
2215   PetscFunctionReturn(PETSC_SUCCESS);
2216 }
2217 
2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2219 {
2220   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2221   PetscInt           m = A->rmap->n, n = A->cmap->n;
2222   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2223   PetscInt          *cmap = mat->garray;
2224   PetscInt          *diagIdx, *offdiagIdx;
2225   Vec                diagV, offdiagV;
2226   PetscScalar       *a, *diagA, *offdiagA;
2227   const PetscScalar *ba, *bav;
2228   PetscInt           r, j, col, ncols, *bi, *bj;
2229   Mat                B = mat->B;
2230   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2231 
2232   PetscFunctionBegin;
2233   /* When a process holds entire A and other processes have no entry */
2234   if (A->cmap->N == n) {
2235     PetscCall(VecGetArrayWrite(v, &diagA));
2236     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2237     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2238     PetscCall(VecDestroy(&diagV));
2239     PetscCall(VecRestoreArrayWrite(v, &diagA));
2240     PetscFunctionReturn(PETSC_SUCCESS);
2241   } else if (n == 0) {
2242     if (m) {
2243       PetscCall(VecGetArrayWrite(v, &a));
2244       for (r = 0; r < m; r++) {
2245         a[r] = 0.0;
2246         if (idx) idx[r] = -1;
2247       }
2248       PetscCall(VecRestoreArrayWrite(v, &a));
2249     }
2250     PetscFunctionReturn(PETSC_SUCCESS);
2251   }
2252 
2253   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2255   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2256   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2257 
2258   /* Get offdiagIdx[] for implicit 0.0 */
2259   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2260   ba = bav;
2261   bi = b->i;
2262   bj = b->j;
2263   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2264   for (r = 0; r < m; r++) {
2265     ncols = bi[r + 1] - bi[r];
2266     if (ncols == A->cmap->N - n) { /* Brow is dense */
2267       offdiagA[r]   = *ba;
2268       offdiagIdx[r] = cmap[0];
2269     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2270       offdiagA[r] = 0.0;
2271 
2272       /* Find first hole in the cmap */
2273       for (j = 0; j < ncols; j++) {
2274         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2275         if (col > j && j < cstart) {
2276           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2277           break;
2278         } else if (col > j + n && j >= cstart) {
2279           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2280           break;
2281         }
2282       }
2283       if (j == ncols && ncols < A->cmap->N - n) {
2284         /* a hole is outside compressed Bcols */
2285         if (ncols == 0) {
2286           if (cstart) {
2287             offdiagIdx[r] = 0;
2288           } else offdiagIdx[r] = cend;
2289         } else { /* ncols > 0 */
2290           offdiagIdx[r] = cmap[ncols - 1] + 1;
2291           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2292         }
2293       }
2294     }
2295 
2296     for (j = 0; j < ncols; j++) {
2297       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2298         offdiagA[r]   = *ba;
2299         offdiagIdx[r] = cmap[*bj];
2300       }
2301       ba++;
2302       bj++;
2303     }
2304   }
2305 
2306   PetscCall(VecGetArrayWrite(v, &a));
2307   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2308   for (r = 0; r < m; ++r) {
2309     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) idx[r] = cstart + diagIdx[r];
2312     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2313       a[r] = diagA[r];
2314       if (idx) {
2315         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2316           idx[r] = cstart + diagIdx[r];
2317         } else idx[r] = offdiagIdx[r];
2318       }
2319     } else {
2320       a[r] = offdiagA[r];
2321       if (idx) idx[r] = offdiagIdx[r];
2322     }
2323   }
2324   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2325   PetscCall(VecRestoreArrayWrite(v, &a));
2326   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2327   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2328   PetscCall(VecDestroy(&diagV));
2329   PetscCall(VecDestroy(&offdiagV));
2330   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2331   PetscFunctionReturn(PETSC_SUCCESS);
2332 }
2333 
2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2335 {
2336   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2337   PetscInt           m = A->rmap->n, n = A->cmap->n;
2338   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2339   PetscInt          *cmap = mat->garray;
2340   PetscInt          *diagIdx, *offdiagIdx;
2341   Vec                diagV, offdiagV;
2342   PetscScalar       *a, *diagA, *offdiagA;
2343   const PetscScalar *ba, *bav;
2344   PetscInt           r, j, col, ncols, *bi, *bj;
2345   Mat                B = mat->B;
2346   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2347 
2348   PetscFunctionBegin;
2349   /* When a process holds entire A and other processes have no entry */
2350   if (A->cmap->N == n) {
2351     PetscCall(VecGetArrayWrite(v, &diagA));
2352     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2353     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2354     PetscCall(VecDestroy(&diagV));
2355     PetscCall(VecRestoreArrayWrite(v, &diagA));
2356     PetscFunctionReturn(PETSC_SUCCESS);
2357   } else if (n == 0) {
2358     if (m) {
2359       PetscCall(VecGetArrayWrite(v, &a));
2360       for (r = 0; r < m; r++) {
2361         a[r] = PETSC_MAX_REAL;
2362         if (idx) idx[r] = -1;
2363       }
2364       PetscCall(VecRestoreArrayWrite(v, &a));
2365     }
2366     PetscFunctionReturn(PETSC_SUCCESS);
2367   }
2368 
2369   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2371   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2372   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2373 
2374   /* Get offdiagIdx[] for implicit 0.0 */
2375   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2376   ba = bav;
2377   bi = b->i;
2378   bj = b->j;
2379   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2380   for (r = 0; r < m; r++) {
2381     ncols = bi[r + 1] - bi[r];
2382     if (ncols == A->cmap->N - n) { /* Brow is dense */
2383       offdiagA[r]   = *ba;
2384       offdiagIdx[r] = cmap[0];
2385     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2386       offdiagA[r] = 0.0;
2387 
2388       /* Find first hole in the cmap */
2389       for (j = 0; j < ncols; j++) {
2390         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2391         if (col > j && j < cstart) {
2392           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2393           break;
2394         } else if (col > j + n && j >= cstart) {
2395           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2396           break;
2397         }
2398       }
2399       if (j == ncols && ncols < A->cmap->N - n) {
2400         /* a hole is outside compressed Bcols */
2401         if (ncols == 0) {
2402           if (cstart) {
2403             offdiagIdx[r] = 0;
2404           } else offdiagIdx[r] = cend;
2405         } else { /* ncols > 0 */
2406           offdiagIdx[r] = cmap[ncols - 1] + 1;
2407           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2408         }
2409       }
2410     }
2411 
2412     for (j = 0; j < ncols; j++) {
2413       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2414         offdiagA[r]   = *ba;
2415         offdiagIdx[r] = cmap[*bj];
2416       }
2417       ba++;
2418       bj++;
2419     }
2420   }
2421 
2422   PetscCall(VecGetArrayWrite(v, &a));
2423   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2424   for (r = 0; r < m; ++r) {
2425     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2426       a[r] = diagA[r];
2427       if (idx) idx[r] = cstart + diagIdx[r];
2428     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2429       a[r] = diagA[r];
2430       if (idx) {
2431         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2432           idx[r] = cstart + diagIdx[r];
2433         } else idx[r] = offdiagIdx[r];
2434       }
2435     } else {
2436       a[r] = offdiagA[r];
2437       if (idx) idx[r] = offdiagIdx[r];
2438     }
2439   }
2440   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2441   PetscCall(VecRestoreArrayWrite(v, &a));
2442   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2443   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2444   PetscCall(VecDestroy(&diagV));
2445   PetscCall(VecDestroy(&offdiagV));
2446   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2447   PetscFunctionReturn(PETSC_SUCCESS);
2448 }
2449 
2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2451 {
2452   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2453   PetscInt           m = A->rmap->n, n = A->cmap->n;
2454   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2455   PetscInt          *cmap = mat->garray;
2456   PetscInt          *diagIdx, *offdiagIdx;
2457   Vec                diagV, offdiagV;
2458   PetscScalar       *a, *diagA, *offdiagA;
2459   const PetscScalar *ba, *bav;
2460   PetscInt           r, j, col, ncols, *bi, *bj;
2461   Mat                B = mat->B;
2462   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2463 
2464   PetscFunctionBegin;
2465   /* When a process holds entire A and other processes have no entry */
2466   if (A->cmap->N == n) {
2467     PetscCall(VecGetArrayWrite(v, &diagA));
2468     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2469     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2470     PetscCall(VecDestroy(&diagV));
2471     PetscCall(VecRestoreArrayWrite(v, &diagA));
2472     PetscFunctionReturn(PETSC_SUCCESS);
2473   } else if (n == 0) {
2474     if (m) {
2475       PetscCall(VecGetArrayWrite(v, &a));
2476       for (r = 0; r < m; r++) {
2477         a[r] = PETSC_MIN_REAL;
2478         if (idx) idx[r] = -1;
2479       }
2480       PetscCall(VecRestoreArrayWrite(v, &a));
2481     }
2482     PetscFunctionReturn(PETSC_SUCCESS);
2483   }
2484 
2485   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2487   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2488   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2489 
2490   /* Get offdiagIdx[] for implicit 0.0 */
2491   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2492   ba = bav;
2493   bi = b->i;
2494   bj = b->j;
2495   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2496   for (r = 0; r < m; r++) {
2497     ncols = bi[r + 1] - bi[r];
2498     if (ncols == A->cmap->N - n) { /* Brow is dense */
2499       offdiagA[r]   = *ba;
2500       offdiagIdx[r] = cmap[0];
2501     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2502       offdiagA[r] = 0.0;
2503 
2504       /* Find first hole in the cmap */
2505       for (j = 0; j < ncols; j++) {
2506         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2507         if (col > j && j < cstart) {
2508           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2509           break;
2510         } else if (col > j + n && j >= cstart) {
2511           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2512           break;
2513         }
2514       }
2515       if (j == ncols && ncols < A->cmap->N - n) {
2516         /* a hole is outside compressed Bcols */
2517         if (ncols == 0) {
2518           if (cstart) {
2519             offdiagIdx[r] = 0;
2520           } else offdiagIdx[r] = cend;
2521         } else { /* ncols > 0 */
2522           offdiagIdx[r] = cmap[ncols - 1] + 1;
2523           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2524         }
2525       }
2526     }
2527 
2528     for (j = 0; j < ncols; j++) {
2529       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2530         offdiagA[r]   = *ba;
2531         offdiagIdx[r] = cmap[*bj];
2532       }
2533       ba++;
2534       bj++;
2535     }
2536   }
2537 
2538   PetscCall(VecGetArrayWrite(v, &a));
2539   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2540   for (r = 0; r < m; ++r) {
2541     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2542       a[r] = diagA[r];
2543       if (idx) idx[r] = cstart + diagIdx[r];
2544     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2545       a[r] = diagA[r];
2546       if (idx) {
2547         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2548           idx[r] = cstart + diagIdx[r];
2549         } else idx[r] = offdiagIdx[r];
2550       }
2551     } else {
2552       a[r] = offdiagA[r];
2553       if (idx) idx[r] = offdiagIdx[r];
2554     }
2555   }
2556   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2557   PetscCall(VecRestoreArrayWrite(v, &a));
2558   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2559   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2560   PetscCall(VecDestroy(&diagV));
2561   PetscCall(VecDestroy(&offdiagV));
2562   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2567 {
2568   Mat *dummy;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2572   *newmat = *dummy;
2573   PetscCall(PetscFree(dummy));
2574   PetscFunctionReturn(PETSC_SUCCESS);
2575 }
2576 
2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2578 {
2579   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2580 
2581   PetscFunctionBegin;
2582   PetscCall(MatInvertBlockDiagonal(a->A, values));
2583   A->factorerrortype = a->A->factorerrortype;
2584   PetscFunctionReturn(PETSC_SUCCESS);
2585 }
2586 
2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2588 {
2589   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2590 
2591   PetscFunctionBegin;
2592   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2593   PetscCall(MatSetRandom(aij->A, rctx));
2594   if (x->assembled) {
2595     PetscCall(MatSetRandom(aij->B, rctx));
2596   } else {
2597     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2598   }
2599   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2600   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2601   PetscFunctionReturn(PETSC_SUCCESS);
2602 }
2603 
2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2605 {
2606   PetscFunctionBegin;
2607   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2608   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2609   PetscFunctionReturn(PETSC_SUCCESS);
2610 }
2611 
2612 /*@
2613   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2614 
2615   Not Collective
2616 
2617   Input Parameter:
2618 . A - the matrix
2619 
2620   Output Parameter:
2621 . nz - the number of nonzeros
2622 
2623   Level: advanced
2624 
2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2626 @*/
2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2628 {
2629   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2630   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2631   PetscBool   isaij;
2632 
2633   PetscFunctionBegin;
2634   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2635   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2636   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2637   PetscFunctionReturn(PETSC_SUCCESS);
2638 }
2639 
2640 /*@
2641   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2642 
2643   Collective
2644 
2645   Input Parameters:
2646 + A  - the matrix
2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2648 
2649   Level: advanced
2650 
2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2652 @*/
2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2654 {
2655   PetscFunctionBegin;
2656   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2661 {
2662   PetscBool sc = PETSC_FALSE, flg;
2663 
2664   PetscFunctionBegin;
2665   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2666   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2667   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2668   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2669   PetscOptionsHeadEnd();
2670   PetscFunctionReturn(PETSC_SUCCESS);
2671 }
2672 
2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2674 {
2675   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2676   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2677 
2678   PetscFunctionBegin;
2679   if (!Y->preallocated) {
2680     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2681   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2682     PetscInt nonew = aij->nonew;
2683     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2684     aij->nonew = nonew;
2685   }
2686   PetscCall(MatShift_Basic(Y, a));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2691 {
2692   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2696   PetscCall(MatMissingDiagonal(a->A, missing, d));
2697   if (d) {
2698     PetscInt rstart;
2699     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2700     *d += rstart;
2701   }
2702   PetscFunctionReturn(PETSC_SUCCESS);
2703 }
2704 
2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2706 {
2707   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2708 
2709   PetscFunctionBegin;
2710   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2711   PetscFunctionReturn(PETSC_SUCCESS);
2712 }
2713 
2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2715 {
2716   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2717 
2718   PetscFunctionBegin;
2719   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2720   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2725                                        MatGetRow_MPIAIJ,
2726                                        MatRestoreRow_MPIAIJ,
2727                                        MatMult_MPIAIJ,
2728                                        /* 4*/ MatMultAdd_MPIAIJ,
2729                                        MatMultTranspose_MPIAIJ,
2730                                        MatMultTransposeAdd_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        /*10*/ NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        MatSOR_MPIAIJ,
2738                                        MatTranspose_MPIAIJ,
2739                                        /*15*/ MatGetInfo_MPIAIJ,
2740                                        MatEqual_MPIAIJ,
2741                                        MatGetDiagonal_MPIAIJ,
2742                                        MatDiagonalScale_MPIAIJ,
2743                                        MatNorm_MPIAIJ,
2744                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2745                                        MatAssemblyEnd_MPIAIJ,
2746                                        MatSetOption_MPIAIJ,
2747                                        MatZeroEntries_MPIAIJ,
2748                                        /*24*/ MatZeroRows_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        /*29*/ MatSetUp_MPI_Hash,
2754                                        NULL,
2755                                        NULL,
2756                                        MatGetDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        /*34*/ MatDuplicate_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        /*39*/ MatAXPY_MPIAIJ,
2764                                        MatCreateSubMatrices_MPIAIJ,
2765                                        MatIncreaseOverlap_MPIAIJ,
2766                                        MatGetValues_MPIAIJ,
2767                                        MatCopy_MPIAIJ,
2768                                        /*44*/ MatGetRowMax_MPIAIJ,
2769                                        MatScale_MPIAIJ,
2770                                        MatShift_MPIAIJ,
2771                                        MatDiagonalSet_MPIAIJ,
2772                                        MatZeroRowsColumns_MPIAIJ,
2773                                        /*49*/ MatSetRandom_MPIAIJ,
2774                                        MatGetRowIJ_MPIAIJ,
2775                                        MatRestoreRowIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2779                                        NULL,
2780                                        MatSetUnfactored_MPIAIJ,
2781                                        MatPermute_MPIAIJ,
2782                                        NULL,
2783                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2784                                        MatDestroy_MPIAIJ,
2785                                        MatView_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        /*64*/ NULL,
2789                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2794                                        MatGetRowMinAbs_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*75*/ MatFDColoringApply_AIJ,
2800                                        MatSetFromOptions_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        MatFindZeroDiagonals_MPIAIJ,
2804                                        /*80*/ NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        /*83*/ MatLoad_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        /*89*/ NULL,
2814                                        NULL,
2815                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        MatBindToCPU_MPIAIJ,
2823                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        MatConjugate_MPIAIJ,
2827                                        NULL,
2828                                        /*104*/ MatSetValuesRow_MPIAIJ,
2829                                        MatRealPart_MPIAIJ,
2830                                        MatImaginaryPart_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        /*109*/ NULL,
2834                                        NULL,
2835                                        MatGetRowMin_MPIAIJ,
2836                                        NULL,
2837                                        MatMissingDiagonal_MPIAIJ,
2838                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2839                                        NULL,
2840                                        MatGetGhosts_MPIAIJ,
2841                                        NULL,
2842                                        NULL,
2843                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2844                                        NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        MatGetMultiProcBlock_MPIAIJ,
2848                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2849                                        MatGetColumnReductions_MPIAIJ,
2850                                        MatInvertBlockDiagonal_MPIAIJ,
2851                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2852                                        MatCreateSubMatricesMPI_MPIAIJ,
2853                                        /*129*/ NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2857                                        NULL,
2858                                        /*134*/ NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        NULL,
2863                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2864                                        NULL,
2865                                        NULL,
2866                                        MatFDColoringSetUp_MPIXAIJ,
2867                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2868                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2869                                        /*145*/ NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        MatCreateGraph_Simple_AIJ,
2873                                        NULL,
2874                                        /*150*/ NULL,
2875                                        MatEliminateZeros_MPIAIJ,
2876                                        MatGetRowSumAbs_MPIAIJ,
2877                                        NULL,
2878                                        NULL,
2879                                        /*155*/ NULL,
2880                                        MatCopyHashToXAIJ_MPI_Hash};
2881 
2882 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2883 {
2884   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2885 
2886   PetscFunctionBegin;
2887   PetscCall(MatStoreValues(aij->A));
2888   PetscCall(MatStoreValues(aij->B));
2889   PetscFunctionReturn(PETSC_SUCCESS);
2890 }
2891 
2892 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2893 {
2894   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2895 
2896   PetscFunctionBegin;
2897   PetscCall(MatRetrieveValues(aij->A));
2898   PetscCall(MatRetrieveValues(aij->B));
2899   PetscFunctionReturn(PETSC_SUCCESS);
2900 }
2901 
2902 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2903 {
2904   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2905   PetscMPIInt size;
2906 
2907   PetscFunctionBegin;
2908   if (B->hash_active) {
2909     B->ops[0]      = b->cops;
2910     B->hash_active = PETSC_FALSE;
2911   }
2912   PetscCall(PetscLayoutSetUp(B->rmap));
2913   PetscCall(PetscLayoutSetUp(B->cmap));
2914 
2915 #if defined(PETSC_USE_CTABLE)
2916   PetscCall(PetscHMapIDestroy(&b->colmap));
2917 #else
2918   PetscCall(PetscFree(b->colmap));
2919 #endif
2920   PetscCall(PetscFree(b->garray));
2921   PetscCall(VecDestroy(&b->lvec));
2922   PetscCall(VecScatterDestroy(&b->Mvctx));
2923 
2924   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2925 
2926   MatSeqXAIJGetOptions_Private(b->B);
2927   PetscCall(MatDestroy(&b->B));
2928   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2929   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2930   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2931   PetscCall(MatSetType(b->B, MATSEQAIJ));
2932   MatSeqXAIJRestoreOptions_Private(b->B);
2933 
2934   MatSeqXAIJGetOptions_Private(b->A);
2935   PetscCall(MatDestroy(&b->A));
2936   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2937   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2938   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2939   PetscCall(MatSetType(b->A, MATSEQAIJ));
2940   MatSeqXAIJRestoreOptions_Private(b->A);
2941 
2942   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2943   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2944   B->preallocated  = PETSC_TRUE;
2945   B->was_assembled = PETSC_FALSE;
2946   B->assembled     = PETSC_FALSE;
2947   PetscFunctionReturn(PETSC_SUCCESS);
2948 }
2949 
2950 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2951 {
2952   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2953 
2954   PetscFunctionBegin;
2955   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2956   PetscCall(PetscLayoutSetUp(B->rmap));
2957   PetscCall(PetscLayoutSetUp(B->cmap));
2958   if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2959   else {
2960 #if defined(PETSC_USE_CTABLE)
2961     PetscCall(PetscHMapIDestroy(&b->colmap));
2962 #else
2963     PetscCall(PetscFree(b->colmap));
2964 #endif
2965     PetscCall(PetscFree(b->garray));
2966     PetscCall(VecDestroy(&b->lvec));
2967   }
2968   PetscCall(VecScatterDestroy(&b->Mvctx));
2969 
2970   PetscCall(MatResetPreallocation(b->A));
2971   PetscCall(MatResetPreallocation(b->B));
2972   B->preallocated  = PETSC_TRUE;
2973   B->was_assembled = PETSC_FALSE;
2974   B->assembled     = PETSC_FALSE;
2975   PetscFunctionReturn(PETSC_SUCCESS);
2976 }
2977 
2978 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2979 {
2980   Mat         mat;
2981   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2982 
2983   PetscFunctionBegin;
2984   *newmat = NULL;
2985   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2986   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2987   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2988   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2989   a = (Mat_MPIAIJ *)mat->data;
2990 
2991   mat->factortype = matin->factortype;
2992   mat->assembled  = matin->assembled;
2993   mat->insertmode = NOT_SET_VALUES;
2994 
2995   a->size         = oldmat->size;
2996   a->rank         = oldmat->rank;
2997   a->donotstash   = oldmat->donotstash;
2998   a->roworiented  = oldmat->roworiented;
2999   a->rowindices   = NULL;
3000   a->rowvalues    = NULL;
3001   a->getrowactive = PETSC_FALSE;
3002 
3003   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3004   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3005   if (matin->hash_active) {
3006     PetscCall(MatSetUp(mat));
3007   } else {
3008     mat->preallocated = matin->preallocated;
3009     if (oldmat->colmap) {
3010 #if defined(PETSC_USE_CTABLE)
3011       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3012 #else
3013       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3014       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3015 #endif
3016     } else a->colmap = NULL;
3017     if (oldmat->garray) {
3018       PetscInt len;
3019       len = oldmat->B->cmap->n;
3020       PetscCall(PetscMalloc1(len + 1, &a->garray));
3021       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3022     } else a->garray = NULL;
3023 
3024     /* It may happen MatDuplicate is called with a non-assembled matrix
3025       In fact, MatDuplicate only requires the matrix to be preallocated
3026       This may happen inside a DMCreateMatrix_Shell */
3027     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3028     if (oldmat->Mvctx) {
3029       a->Mvctx = oldmat->Mvctx;
3030       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3031     }
3032     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3033     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3034   }
3035   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3036   *newmat = mat;
3037   PetscFunctionReturn(PETSC_SUCCESS);
3038 }
3039 
3040 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3041 {
3042   PetscBool isbinary, ishdf5;
3043 
3044   PetscFunctionBegin;
3045   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3046   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3047   /* force binary viewer to load .info file if it has not yet done so */
3048   PetscCall(PetscViewerSetUp(viewer));
3049   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3050   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3051   if (isbinary) {
3052     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3053   } else if (ishdf5) {
3054 #if defined(PETSC_HAVE_HDF5)
3055     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3056 #else
3057     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3058 #endif
3059   } else {
3060     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3061   }
3062   PetscFunctionReturn(PETSC_SUCCESS);
3063 }
3064 
3065 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3066 {
3067   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3068   PetscInt    *rowidxs, *colidxs;
3069   PetscScalar *matvals;
3070 
3071   PetscFunctionBegin;
3072   PetscCall(PetscViewerSetUp(viewer));
3073 
3074   /* read in matrix header */
3075   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3076   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3077   M  = header[1];
3078   N  = header[2];
3079   nz = header[3];
3080   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3081   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3082   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3083 
3084   /* set block sizes from the viewer's .info file */
3085   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3086   /* set global sizes if not set already */
3087   if (mat->rmap->N < 0) mat->rmap->N = M;
3088   if (mat->cmap->N < 0) mat->cmap->N = N;
3089   PetscCall(PetscLayoutSetUp(mat->rmap));
3090   PetscCall(PetscLayoutSetUp(mat->cmap));
3091 
3092   /* check if the matrix sizes are correct */
3093   PetscCall(MatGetSize(mat, &rows, &cols));
3094   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3095 
3096   /* read in row lengths and build row indices */
3097   PetscCall(MatGetLocalSize(mat, &m, NULL));
3098   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3099   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3100   rowidxs[0] = 0;
3101   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3102   if (nz != PETSC_INT_MAX) {
3103     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3104     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3105   }
3106 
3107   /* read in column indices and matrix values */
3108   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3109   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3110   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3111   /* store matrix indices and values */
3112   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3113   PetscCall(PetscFree(rowidxs));
3114   PetscCall(PetscFree2(colidxs, matvals));
3115   PetscFunctionReturn(PETSC_SUCCESS);
3116 }
3117 
3118 /* Not scalable because of ISAllGather() unless getting all columns. */
3119 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3120 {
3121   IS          iscol_local;
3122   PetscBool   isstride;
3123   PetscMPIInt gisstride = 0;
3124 
3125   PetscFunctionBegin;
3126   /* check if we are grabbing all columns*/
3127   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3128 
3129   if (isstride) {
3130     PetscInt start, len, mstart, mlen;
3131     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3132     PetscCall(ISGetLocalSize(iscol, &len));
3133     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3134     if (mstart == start && mlen - mstart == len) gisstride = 1;
3135   }
3136 
3137   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3138   if (gisstride) {
3139     PetscInt N;
3140     PetscCall(MatGetSize(mat, NULL, &N));
3141     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3142     PetscCall(ISSetIdentity(iscol_local));
3143     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3144   } else {
3145     PetscInt cbs;
3146     PetscCall(ISGetBlockSize(iscol, &cbs));
3147     PetscCall(ISAllGather(iscol, &iscol_local));
3148     PetscCall(ISSetBlockSize(iscol_local, cbs));
3149   }
3150 
3151   *isseq = iscol_local;
3152   PetscFunctionReturn(PETSC_SUCCESS);
3153 }
3154 
3155 /*
3156  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3157  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3158 
3159  Input Parameters:
3160 +   mat - matrix
3161 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3162            i.e., mat->rstart <= isrow[i] < mat->rend
3163 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3164            i.e., mat->cstart <= iscol[i] < mat->cend
3165 
3166  Output Parameters:
3167 +   isrow_d - sequential row index set for retrieving mat->A
3168 .   iscol_d - sequential  column index set for retrieving mat->A
3169 .   iscol_o - sequential column index set for retrieving mat->B
3170 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3171  */
3172 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3173 {
3174   Vec             x, cmap;
3175   const PetscInt *is_idx;
3176   PetscScalar    *xarray, *cmaparray;
3177   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3178   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3179   Mat             B    = a->B;
3180   Vec             lvec = a->lvec, lcmap;
3181   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3182   MPI_Comm        comm;
3183   VecScatter      Mvctx = a->Mvctx;
3184 
3185   PetscFunctionBegin;
3186   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3187   PetscCall(ISGetLocalSize(iscol, &ncols));
3188 
3189   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3190   PetscCall(MatCreateVecs(mat, &x, NULL));
3191   PetscCall(VecSet(x, -1.0));
3192   PetscCall(VecDuplicate(x, &cmap));
3193   PetscCall(VecSet(cmap, -1.0));
3194 
3195   /* Get start indices */
3196   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3197   isstart -= ncols;
3198   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3199 
3200   PetscCall(ISGetIndices(iscol, &is_idx));
3201   PetscCall(VecGetArray(x, &xarray));
3202   PetscCall(VecGetArray(cmap, &cmaparray));
3203   PetscCall(PetscMalloc1(ncols, &idx));
3204   for (i = 0; i < ncols; i++) {
3205     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3206     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3207     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3208   }
3209   PetscCall(VecRestoreArray(x, &xarray));
3210   PetscCall(VecRestoreArray(cmap, &cmaparray));
3211   PetscCall(ISRestoreIndices(iscol, &is_idx));
3212 
3213   /* Get iscol_d */
3214   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3215   PetscCall(ISGetBlockSize(iscol, &i));
3216   PetscCall(ISSetBlockSize(*iscol_d, i));
3217 
3218   /* Get isrow_d */
3219   PetscCall(ISGetLocalSize(isrow, &m));
3220   rstart = mat->rmap->rstart;
3221   PetscCall(PetscMalloc1(m, &idx));
3222   PetscCall(ISGetIndices(isrow, &is_idx));
3223   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3224   PetscCall(ISRestoreIndices(isrow, &is_idx));
3225 
3226   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3227   PetscCall(ISGetBlockSize(isrow, &i));
3228   PetscCall(ISSetBlockSize(*isrow_d, i));
3229 
3230   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3231   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3232   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3233 
3234   PetscCall(VecDuplicate(lvec, &lcmap));
3235 
3236   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3237   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3238 
3239   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3240   /* off-process column indices */
3241   count = 0;
3242   PetscCall(PetscMalloc1(Bn, &idx));
3243   PetscCall(PetscMalloc1(Bn, &cmap1));
3244 
3245   PetscCall(VecGetArray(lvec, &xarray));
3246   PetscCall(VecGetArray(lcmap, &cmaparray));
3247   for (i = 0; i < Bn; i++) {
3248     if (PetscRealPart(xarray[i]) > -1.0) {
3249       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3250       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3251       count++;
3252     }
3253   }
3254   PetscCall(VecRestoreArray(lvec, &xarray));
3255   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3256 
3257   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3258   /* cannot ensure iscol_o has same blocksize as iscol! */
3259 
3260   PetscCall(PetscFree(idx));
3261   *garray = cmap1;
3262 
3263   PetscCall(VecDestroy(&x));
3264   PetscCall(VecDestroy(&cmap));
3265   PetscCall(VecDestroy(&lcmap));
3266   PetscFunctionReturn(PETSC_SUCCESS);
3267 }
3268 
3269 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3270 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3271 {
3272   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3273   Mat         M = NULL;
3274   MPI_Comm    comm;
3275   IS          iscol_d, isrow_d, iscol_o;
3276   Mat         Asub = NULL, Bsub = NULL;
3277   PetscInt    n;
3278 
3279   PetscFunctionBegin;
3280   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3281 
3282   if (call == MAT_REUSE_MATRIX) {
3283     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3284     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3285     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3286 
3287     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3288     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3289 
3290     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3291     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3292 
3293     /* Update diagonal and off-diagonal portions of submat */
3294     asub = (Mat_MPIAIJ *)(*submat)->data;
3295     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3296     PetscCall(ISGetLocalSize(iscol_o, &n));
3297     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3298     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3299     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3300 
3301   } else { /* call == MAT_INITIAL_MATRIX) */
3302     PetscInt *garray;
3303     PetscInt  BsubN;
3304 
3305     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3306     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3307 
3308     /* Create local submatrices Asub and Bsub */
3309     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3310     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3311 
3312     /* Create submatrix M */
3313     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3314 
3315     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3316     asub = (Mat_MPIAIJ *)M->data;
3317 
3318     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3319     n = asub->B->cmap->N;
3320     if (BsubN > n) {
3321       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3322       const PetscInt *idx;
3323       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3324       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3325 
3326       PetscCall(PetscMalloc1(n, &idx_new));
3327       j = 0;
3328       PetscCall(ISGetIndices(iscol_o, &idx));
3329       for (i = 0; i < n; i++) {
3330         if (j >= BsubN) break;
3331         while (subgarray[i] > garray[j]) j++;
3332 
3333         if (subgarray[i] == garray[j]) {
3334           idx_new[i] = idx[j++];
3335         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3336       }
3337       PetscCall(ISRestoreIndices(iscol_o, &idx));
3338 
3339       PetscCall(ISDestroy(&iscol_o));
3340       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3341 
3342     } else if (BsubN < n) {
3343       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3344     }
3345 
3346     PetscCall(PetscFree(garray));
3347     *submat = M;
3348 
3349     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3350     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3351     PetscCall(ISDestroy(&isrow_d));
3352 
3353     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3354     PetscCall(ISDestroy(&iscol_d));
3355 
3356     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3357     PetscCall(ISDestroy(&iscol_o));
3358   }
3359   PetscFunctionReturn(PETSC_SUCCESS);
3360 }
3361 
3362 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3363 {
3364   IS        iscol_local = NULL, isrow_d;
3365   PetscInt  csize;
3366   PetscInt  n, i, j, start, end;
3367   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3368   MPI_Comm  comm;
3369 
3370   PetscFunctionBegin;
3371   /* If isrow has same processor distribution as mat,
3372      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3373   if (call == MAT_REUSE_MATRIX) {
3374     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3375     if (isrow_d) {
3376       sameRowDist  = PETSC_TRUE;
3377       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3378     } else {
3379       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3380       if (iscol_local) {
3381         sameRowDist  = PETSC_TRUE;
3382         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3383       }
3384     }
3385   } else {
3386     /* Check if isrow has same processor distribution as mat */
3387     sameDist[0] = PETSC_FALSE;
3388     PetscCall(ISGetLocalSize(isrow, &n));
3389     if (!n) {
3390       sameDist[0] = PETSC_TRUE;
3391     } else {
3392       PetscCall(ISGetMinMax(isrow, &i, &j));
3393       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3394       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3395     }
3396 
3397     /* Check if iscol has same processor distribution as mat */
3398     sameDist[1] = PETSC_FALSE;
3399     PetscCall(ISGetLocalSize(iscol, &n));
3400     if (!n) {
3401       sameDist[1] = PETSC_TRUE;
3402     } else {
3403       PetscCall(ISGetMinMax(iscol, &i, &j));
3404       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3405       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3406     }
3407 
3408     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3409     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3410     sameRowDist = tsameDist[0];
3411   }
3412 
3413   if (sameRowDist) {
3414     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3415       /* isrow and iscol have same processor distribution as mat */
3416       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3417       PetscFunctionReturn(PETSC_SUCCESS);
3418     } else { /* sameRowDist */
3419       /* isrow has same processor distribution as mat */
3420       if (call == MAT_INITIAL_MATRIX) {
3421         PetscBool sorted;
3422         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3423         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3424         PetscCall(ISGetSize(iscol, &i));
3425         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3426 
3427         PetscCall(ISSorted(iscol_local, &sorted));
3428         if (sorted) {
3429           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3430           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3431           PetscFunctionReturn(PETSC_SUCCESS);
3432         }
3433       } else { /* call == MAT_REUSE_MATRIX */
3434         IS iscol_sub;
3435         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3436         if (iscol_sub) {
3437           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3438           PetscFunctionReturn(PETSC_SUCCESS);
3439         }
3440       }
3441     }
3442   }
3443 
3444   /* General case: iscol -> iscol_local which has global size of iscol */
3445   if (call == MAT_REUSE_MATRIX) {
3446     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3447     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3448   } else {
3449     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3450   }
3451 
3452   PetscCall(ISGetLocalSize(iscol, &csize));
3453   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3454 
3455   if (call == MAT_INITIAL_MATRIX) {
3456     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3457     PetscCall(ISDestroy(&iscol_local));
3458   }
3459   PetscFunctionReturn(PETSC_SUCCESS);
3460 }
3461 
3462 /*@C
3463   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3464   and "off-diagonal" part of the matrix in CSR format.
3465 
3466   Collective
3467 
3468   Input Parameters:
3469 + comm   - MPI communicator
3470 . A      - "diagonal" portion of matrix
3471 . B      - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3472 - garray - global index of `B` columns
3473 
3474   Output Parameter:
3475 . mat - the matrix, with input `A` as its local diagonal matrix
3476 
3477   Level: advanced
3478 
3479   Notes:
3480   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3481 
3482   `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3483 
3484 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3485 @*/
3486 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3487 {
3488   Mat_MPIAIJ        *maij;
3489   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3490   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3491   const PetscScalar *oa;
3492   Mat                Bnew;
3493   PetscInt           m, n, N;
3494   MatType            mpi_mat_type;
3495 
3496   PetscFunctionBegin;
3497   PetscCall(MatCreate(comm, mat));
3498   PetscCall(MatGetSize(A, &m, &n));
3499   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3500   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3501   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3502   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3503 
3504   /* Get global columns of mat */
3505   PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3506 
3507   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3508   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3509   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3510   PetscCall(MatSetType(*mat, mpi_mat_type));
3511 
3512   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3513   maij = (Mat_MPIAIJ *)(*mat)->data;
3514 
3515   (*mat)->preallocated = PETSC_TRUE;
3516 
3517   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3518   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3519 
3520   /* Set A as diagonal portion of *mat */
3521   maij->A = A;
3522 
3523   nz = oi[m];
3524   for (i = 0; i < nz; i++) {
3525     col   = oj[i];
3526     oj[i] = garray[col];
3527   }
3528 
3529   /* Set Bnew as off-diagonal portion of *mat */
3530   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3531   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3532   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3533   bnew        = (Mat_SeqAIJ *)Bnew->data;
3534   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3535   maij->B     = Bnew;
3536 
3537   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3538 
3539   b->free_a  = PETSC_FALSE;
3540   b->free_ij = PETSC_FALSE;
3541   PetscCall(MatDestroy(&B));
3542 
3543   bnew->free_a  = PETSC_TRUE;
3544   bnew->free_ij = PETSC_TRUE;
3545 
3546   /* condense columns of maij->B */
3547   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3548   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3549   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3550   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3551   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3552   PetscFunctionReturn(PETSC_SUCCESS);
3553 }
3554 
3555 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3556 
3557 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3558 {
3559   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3560   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3561   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3562   Mat             M, Msub, B = a->B;
3563   MatScalar      *aa;
3564   Mat_SeqAIJ     *aij;
3565   PetscInt       *garray = a->garray, *colsub, Ncols;
3566   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3567   IS              iscol_sub, iscmap;
3568   const PetscInt *is_idx, *cmap;
3569   PetscBool       allcolumns = PETSC_FALSE;
3570   MPI_Comm        comm;
3571 
3572   PetscFunctionBegin;
3573   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3574   if (call == MAT_REUSE_MATRIX) {
3575     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3576     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3577     PetscCall(ISGetLocalSize(iscol_sub, &count));
3578 
3579     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3580     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3581 
3582     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3583     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3584 
3585     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3586 
3587   } else { /* call == MAT_INITIAL_MATRIX) */
3588     PetscBool flg;
3589 
3590     PetscCall(ISGetLocalSize(iscol, &n));
3591     PetscCall(ISGetSize(iscol, &Ncols));
3592 
3593     /* (1) iscol -> nonscalable iscol_local */
3594     /* Check for special case: each processor gets entire matrix columns */
3595     PetscCall(ISIdentity(iscol_local, &flg));
3596     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3597     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3598     if (allcolumns) {
3599       iscol_sub = iscol_local;
3600       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3601       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3602 
3603     } else {
3604       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3605       PetscInt *idx, *cmap1, k;
3606       PetscCall(PetscMalloc1(Ncols, &idx));
3607       PetscCall(PetscMalloc1(Ncols, &cmap1));
3608       PetscCall(ISGetIndices(iscol_local, &is_idx));
3609       count = 0;
3610       k     = 0;
3611       for (i = 0; i < Ncols; i++) {
3612         j = is_idx[i];
3613         if (j >= cstart && j < cend) {
3614           /* diagonal part of mat */
3615           idx[count]     = j;
3616           cmap1[count++] = i; /* column index in submat */
3617         } else if (Bn) {
3618           /* off-diagonal part of mat */
3619           if (j == garray[k]) {
3620             idx[count]     = j;
3621             cmap1[count++] = i; /* column index in submat */
3622           } else if (j > garray[k]) {
3623             while (j > garray[k] && k < Bn - 1) k++;
3624             if (j == garray[k]) {
3625               idx[count]     = j;
3626               cmap1[count++] = i; /* column index in submat */
3627             }
3628           }
3629         }
3630       }
3631       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3632 
3633       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3634       PetscCall(ISGetBlockSize(iscol, &cbs));
3635       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3636 
3637       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3638     }
3639 
3640     /* (3) Create sequential Msub */
3641     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3642   }
3643 
3644   PetscCall(ISGetLocalSize(iscol_sub, &count));
3645   aij = (Mat_SeqAIJ *)Msub->data;
3646   ii  = aij->i;
3647   PetscCall(ISGetIndices(iscmap, &cmap));
3648 
3649   /*
3650       m - number of local rows
3651       Ncols - number of columns (same on all processors)
3652       rstart - first row in new global matrix generated
3653   */
3654   PetscCall(MatGetSize(Msub, &m, NULL));
3655 
3656   if (call == MAT_INITIAL_MATRIX) {
3657     /* (4) Create parallel newmat */
3658     PetscMPIInt rank, size;
3659     PetscInt    csize;
3660 
3661     PetscCallMPI(MPI_Comm_size(comm, &size));
3662     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3663 
3664     /*
3665         Determine the number of non-zeros in the diagonal and off-diagonal
3666         portions of the matrix in order to do correct preallocation
3667     */
3668 
3669     /* first get start and end of "diagonal" columns */
3670     PetscCall(ISGetLocalSize(iscol, &csize));
3671     if (csize == PETSC_DECIDE) {
3672       PetscCall(ISGetSize(isrow, &mglobal));
3673       if (mglobal == Ncols) { /* square matrix */
3674         nlocal = m;
3675       } else {
3676         nlocal = Ncols / size + ((Ncols % size) > rank);
3677       }
3678     } else {
3679       nlocal = csize;
3680     }
3681     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3682     rstart = rend - nlocal;
3683     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3684 
3685     /* next, compute all the lengths */
3686     jj = aij->j;
3687     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3688     olens = dlens + m;
3689     for (i = 0; i < m; i++) {
3690       jend = ii[i + 1] - ii[i];
3691       olen = 0;
3692       dlen = 0;
3693       for (j = 0; j < jend; j++) {
3694         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3695         else dlen++;
3696         jj++;
3697       }
3698       olens[i] = olen;
3699       dlens[i] = dlen;
3700     }
3701 
3702     PetscCall(ISGetBlockSize(isrow, &bs));
3703     PetscCall(ISGetBlockSize(iscol, &cbs));
3704 
3705     PetscCall(MatCreate(comm, &M));
3706     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3707     PetscCall(MatSetBlockSizes(M, bs, cbs));
3708     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3709     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3710     PetscCall(PetscFree(dlens));
3711 
3712   } else { /* call == MAT_REUSE_MATRIX */
3713     M = *newmat;
3714     PetscCall(MatGetLocalSize(M, &i, NULL));
3715     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3716     PetscCall(MatZeroEntries(M));
3717     /*
3718          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3719        rather than the slower MatSetValues().
3720     */
3721     M->was_assembled = PETSC_TRUE;
3722     M->assembled     = PETSC_FALSE;
3723   }
3724 
3725   /* (5) Set values of Msub to *newmat */
3726   PetscCall(PetscMalloc1(count, &colsub));
3727   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3728 
3729   jj = aij->j;
3730   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3731   for (i = 0; i < m; i++) {
3732     row = rstart + i;
3733     nz  = ii[i + 1] - ii[i];
3734     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3735     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3736     jj += nz;
3737     aa += nz;
3738   }
3739   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3740   PetscCall(ISRestoreIndices(iscmap, &cmap));
3741 
3742   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3743   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3744 
3745   PetscCall(PetscFree(colsub));
3746 
3747   /* save Msub, iscol_sub and iscmap used in processor for next request */
3748   if (call == MAT_INITIAL_MATRIX) {
3749     *newmat = M;
3750     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3751     PetscCall(MatDestroy(&Msub));
3752 
3753     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3754     PetscCall(ISDestroy(&iscol_sub));
3755 
3756     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3757     PetscCall(ISDestroy(&iscmap));
3758 
3759     if (iscol_local) {
3760       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3761       PetscCall(ISDestroy(&iscol_local));
3762     }
3763   }
3764   PetscFunctionReturn(PETSC_SUCCESS);
3765 }
3766 
3767 /*
3768     Not great since it makes two copies of the submatrix, first an SeqAIJ
3769   in local and then by concatenating the local matrices the end result.
3770   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3771 
3772   This requires a sequential iscol with all indices.
3773 */
3774 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3775 {
3776   PetscMPIInt rank, size;
3777   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3778   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3779   Mat         M, Mreuse;
3780   MatScalar  *aa, *vwork;
3781   MPI_Comm    comm;
3782   Mat_SeqAIJ *aij;
3783   PetscBool   colflag, allcolumns = PETSC_FALSE;
3784 
3785   PetscFunctionBegin;
3786   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3787   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3788   PetscCallMPI(MPI_Comm_size(comm, &size));
3789 
3790   /* Check for special case: each processor gets entire matrix columns */
3791   PetscCall(ISIdentity(iscol, &colflag));
3792   PetscCall(ISGetLocalSize(iscol, &n));
3793   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3794   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3795 
3796   if (call == MAT_REUSE_MATRIX) {
3797     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3798     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3799     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3800   } else {
3801     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3802   }
3803 
3804   /*
3805       m - number of local rows
3806       n - number of columns (same on all processors)
3807       rstart - first row in new global matrix generated
3808   */
3809   PetscCall(MatGetSize(Mreuse, &m, &n));
3810   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3811   if (call == MAT_INITIAL_MATRIX) {
3812     aij = (Mat_SeqAIJ *)Mreuse->data;
3813     ii  = aij->i;
3814     jj  = aij->j;
3815 
3816     /*
3817         Determine the number of non-zeros in the diagonal and off-diagonal
3818         portions of the matrix in order to do correct preallocation
3819     */
3820 
3821     /* first get start and end of "diagonal" columns */
3822     if (csize == PETSC_DECIDE) {
3823       PetscCall(ISGetSize(isrow, &mglobal));
3824       if (mglobal == n) { /* square matrix */
3825         nlocal = m;
3826       } else {
3827         nlocal = n / size + ((n % size) > rank);
3828       }
3829     } else {
3830       nlocal = csize;
3831     }
3832     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3833     rstart = rend - nlocal;
3834     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3835 
3836     /* next, compute all the lengths */
3837     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3838     olens = dlens + m;
3839     for (i = 0; i < m; i++) {
3840       jend = ii[i + 1] - ii[i];
3841       olen = 0;
3842       dlen = 0;
3843       for (j = 0; j < jend; j++) {
3844         if (*jj < rstart || *jj >= rend) olen++;
3845         else dlen++;
3846         jj++;
3847       }
3848       olens[i] = olen;
3849       dlens[i] = dlen;
3850     }
3851     PetscCall(MatCreate(comm, &M));
3852     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3853     PetscCall(MatSetBlockSizes(M, bs, cbs));
3854     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3855     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3856     PetscCall(PetscFree(dlens));
3857   } else {
3858     PetscInt ml, nl;
3859 
3860     M = *newmat;
3861     PetscCall(MatGetLocalSize(M, &ml, &nl));
3862     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3863     PetscCall(MatZeroEntries(M));
3864     /*
3865          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3866        rather than the slower MatSetValues().
3867     */
3868     M->was_assembled = PETSC_TRUE;
3869     M->assembled     = PETSC_FALSE;
3870   }
3871   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3872   aij = (Mat_SeqAIJ *)Mreuse->data;
3873   ii  = aij->i;
3874   jj  = aij->j;
3875 
3876   /* trigger copy to CPU if needed */
3877   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3878   for (i = 0; i < m; i++) {
3879     row   = rstart + i;
3880     nz    = ii[i + 1] - ii[i];
3881     cwork = jj;
3882     jj    = PetscSafePointerPlusOffset(jj, nz);
3883     vwork = aa;
3884     aa    = PetscSafePointerPlusOffset(aa, nz);
3885     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3886   }
3887   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3888 
3889   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3890   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3891   *newmat = M;
3892 
3893   /* save submatrix used in processor for next request */
3894   if (call == MAT_INITIAL_MATRIX) {
3895     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3896     PetscCall(MatDestroy(&Mreuse));
3897   }
3898   PetscFunctionReturn(PETSC_SUCCESS);
3899 }
3900 
3901 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3902 {
3903   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3904   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3905   const PetscInt *JJ;
3906   PetscBool       nooffprocentries;
3907   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3908 
3909   PetscFunctionBegin;
3910   PetscCall(PetscLayoutSetUp(B->rmap));
3911   PetscCall(PetscLayoutSetUp(B->cmap));
3912   m       = B->rmap->n;
3913   cstart  = B->cmap->rstart;
3914   cend    = B->cmap->rend;
3915   rstart  = B->rmap->rstart;
3916   irstart = Ii[0];
3917 
3918   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3919 
3920   if (PetscDefined(USE_DEBUG)) {
3921     for (i = 0; i < m; i++) {
3922       nnz = Ii[i + 1] - Ii[i];
3923       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3924       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3925       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3926       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3927     }
3928   }
3929 
3930   for (i = 0; i < m; i++) {
3931     nnz     = Ii[i + 1] - Ii[i];
3932     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3933     nnz_max = PetscMax(nnz_max, nnz);
3934     d       = 0;
3935     for (j = 0; j < nnz; j++) {
3936       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3937     }
3938     d_nnz[i] = d;
3939     o_nnz[i] = nnz - d;
3940   }
3941   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3942   PetscCall(PetscFree2(d_nnz, o_nnz));
3943 
3944   for (i = 0; i < m; i++) {
3945     ii = i + rstart;
3946     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3947   }
3948   nooffprocentries    = B->nooffprocentries;
3949   B->nooffprocentries = PETSC_TRUE;
3950   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3951   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3952   B->nooffprocentries = nooffprocentries;
3953 
3954   /* count number of entries below block diagonal */
3955   PetscCall(PetscFree(Aij->ld));
3956   PetscCall(PetscCalloc1(m, &ld));
3957   Aij->ld = ld;
3958   for (i = 0; i < m; i++) {
3959     nnz = Ii[i + 1] - Ii[i];
3960     j   = 0;
3961     while (j < nnz && J[j] < cstart) j++;
3962     ld[i] = j;
3963     if (J) J += nnz;
3964   }
3965 
3966   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3967   PetscFunctionReturn(PETSC_SUCCESS);
3968 }
3969 
3970 /*@
3971   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3972   (the default parallel PETSc format).
3973 
3974   Collective
3975 
3976   Input Parameters:
3977 + B - the matrix
3978 . i - the indices into `j` for the start of each local row (indices start with zero)
3979 . j - the column indices for each local row (indices start with zero)
3980 - v - optional values in the matrix
3981 
3982   Level: developer
3983 
3984   Notes:
3985   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3986   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3987   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3988 
3989   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3990 
3991   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3992 
3993   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3994 
3995   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3996   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3997 
3998   The format which is used for the sparse matrix input, is equivalent to a
3999   row-major ordering.. i.e for the following matrix, the input data expected is
4000   as shown
4001 .vb
4002         1 0 0
4003         2 0 3     P0
4004        -------
4005         4 5 6     P1
4006 
4007      Process0 [P0] rows_owned=[0,1]
4008         i =  {0,1,3}  [size = nrow+1  = 2+1]
4009         j =  {0,0,2}  [size = 3]
4010         v =  {1,2,3}  [size = 3]
4011 
4012      Process1 [P1] rows_owned=[2]
4013         i =  {0,3}    [size = nrow+1  = 1+1]
4014         j =  {0,1,2}  [size = 3]
4015         v =  {4,5,6}  [size = 3]
4016 .ve
4017 
4018 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4019           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4020 @*/
4021 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4022 {
4023   PetscFunctionBegin;
4024   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4025   PetscFunctionReturn(PETSC_SUCCESS);
4026 }
4027 
4028 /*@
4029   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4030   (the default parallel PETSc format).  For good matrix assembly performance
4031   the user should preallocate the matrix storage by setting the parameters
4032   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4033 
4034   Collective
4035 
4036   Input Parameters:
4037 + B     - the matrix
4038 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4039            (same value is used for all local rows)
4040 . d_nnz - array containing the number of nonzeros in the various rows of the
4041            DIAGONAL portion of the local submatrix (possibly different for each row)
4042            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4043            The size of this array is equal to the number of local rows, i.e 'm'.
4044            For matrices that will be factored, you must leave room for (and set)
4045            the diagonal entry even if it is zero.
4046 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4047            submatrix (same value is used for all local rows).
4048 - o_nnz - array containing the number of nonzeros in the various rows of the
4049            OFF-DIAGONAL portion of the local submatrix (possibly different for
4050            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4051            structure. The size of this array is equal to the number
4052            of local rows, i.e 'm'.
4053 
4054   Example Usage:
4055   Consider the following 8x8 matrix with 34 non-zero values, that is
4056   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4057   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4058   as follows
4059 
4060 .vb
4061             1  2  0  |  0  3  0  |  0  4
4062     Proc0   0  5  6  |  7  0  0  |  8  0
4063             9  0 10  | 11  0  0  | 12  0
4064     -------------------------------------
4065            13  0 14  | 15 16 17  |  0  0
4066     Proc1   0 18  0  | 19 20 21  |  0  0
4067             0  0  0  | 22 23  0  | 24  0
4068     -------------------------------------
4069     Proc2  25 26 27  |  0  0 28  | 29  0
4070            30  0  0  | 31 32 33  |  0 34
4071 .ve
4072 
4073   This can be represented as a collection of submatrices as
4074 .vb
4075       A B C
4076       D E F
4077       G H I
4078 .ve
4079 
4080   Where the submatrices A,B,C are owned by proc0, D,E,F are
4081   owned by proc1, G,H,I are owned by proc2.
4082 
4083   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4084   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4085   The 'M','N' parameters are 8,8, and have the same values on all procs.
4086 
4087   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4088   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4089   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4090   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4091   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4092   matrix, and [DF] as another `MATSEQAIJ` matrix.
4093 
4094   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4095   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4096   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4097   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4098   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4099   In this case, the values of `d_nz`, `o_nz` are
4100 .vb
4101      proc0  dnz = 2, o_nz = 2
4102      proc1  dnz = 3, o_nz = 2
4103      proc2  dnz = 1, o_nz = 4
4104 .ve
4105   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4106   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4107   for proc3. i.e we are using 12+15+10=37 storage locations to store
4108   34 values.
4109 
4110   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4111   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4112   In the above case the values for `d_nnz`, `o_nnz` are
4113 .vb
4114      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4115      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4116      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4117 .ve
4118   Here the space allocated is sum of all the above values i.e 34, and
4119   hence pre-allocation is perfect.
4120 
4121   Level: intermediate
4122 
4123   Notes:
4124   If the *_nnz parameter is given then the *_nz parameter is ignored
4125 
4126   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4127   storage.  The stored row and column indices begin with zero.
4128   See [Sparse Matrices](sec_matsparse) for details.
4129 
4130   The parallel matrix is partitioned such that the first m0 rows belong to
4131   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4132   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4133 
4134   The DIAGONAL portion of the local submatrix of a processor can be defined
4135   as the submatrix which is obtained by extraction the part corresponding to
4136   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4137   first row that belongs to the processor, r2 is the last row belonging to
4138   the this processor, and c1-c2 is range of indices of the local part of a
4139   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4140   common case of a square matrix, the row and column ranges are the same and
4141   the DIAGONAL part is also square. The remaining portion of the local
4142   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4143 
4144   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4145 
4146   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4147   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4148   You can also run with the option `-info` and look for messages with the string
4149   malloc in them to see if additional memory allocation was needed.
4150 
4151 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4152           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4153 @*/
4154 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4155 {
4156   PetscFunctionBegin;
4157   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4158   PetscValidType(B, 1);
4159   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4160   PetscFunctionReturn(PETSC_SUCCESS);
4161 }
4162 
4163 /*@
4164   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4165   CSR format for the local rows.
4166 
4167   Collective
4168 
4169   Input Parameters:
4170 + comm - MPI communicator
4171 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4172 . n    - This value should be the same as the local size used in creating the
4173          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4174          calculated if `N` is given) For square matrices n is almost always `m`.
4175 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4176 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4177 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4178 . j    - global column indices
4179 - a    - optional matrix values
4180 
4181   Output Parameter:
4182 . mat - the matrix
4183 
4184   Level: intermediate
4185 
4186   Notes:
4187   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4188   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4189   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4190 
4191   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4192 
4193   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4194 
4195   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4196   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4197 
4198   The format which is used for the sparse matrix input, is equivalent to a
4199   row-major ordering, i.e., for the following matrix, the input data expected is
4200   as shown
4201 .vb
4202         1 0 0
4203         2 0 3     P0
4204        -------
4205         4 5 6     P1
4206 
4207      Process0 [P0] rows_owned=[0,1]
4208         i =  {0,1,3}  [size = nrow+1  = 2+1]
4209         j =  {0,0,2}  [size = 3]
4210         v =  {1,2,3}  [size = 3]
4211 
4212      Process1 [P1] rows_owned=[2]
4213         i =  {0,3}    [size = nrow+1  = 1+1]
4214         j =  {0,1,2}  [size = 3]
4215         v =  {4,5,6}  [size = 3]
4216 .ve
4217 
4218 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4219           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4220 @*/
4221 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4222 {
4223   PetscFunctionBegin;
4224   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4225   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4226   PetscCall(MatCreate(comm, mat));
4227   PetscCall(MatSetSizes(*mat, m, n, M, N));
4228   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4229   PetscCall(MatSetType(*mat, MATMPIAIJ));
4230   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4231   PetscFunctionReturn(PETSC_SUCCESS);
4232 }
4233 
4234 /*@
4235   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4236   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4237   from `MatCreateMPIAIJWithArrays()`
4238 
4239   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4240 
4241   Collective
4242 
4243   Input Parameters:
4244 + mat - the matrix
4245 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4246 . n   - This value should be the same as the local size used in creating the
4247        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4248        calculated if N is given) For square matrices n is almost always m.
4249 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4250 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4251 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4252 . J   - column indices
4253 - v   - matrix values
4254 
4255   Level: deprecated
4256 
4257 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4258           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4259 @*/
4260 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4261 {
4262   PetscInt        nnz, i;
4263   PetscBool       nooffprocentries;
4264   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4265   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4266   PetscScalar    *ad, *ao;
4267   PetscInt        ldi, Iii, md;
4268   const PetscInt *Adi = Ad->i;
4269   PetscInt       *ld  = Aij->ld;
4270 
4271   PetscFunctionBegin;
4272   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4273   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4274   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4275   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4276 
4277   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4278   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4279 
4280   for (i = 0; i < m; i++) {
4281     if (PetscDefined(USE_DEBUG)) {
4282       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4283         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4284         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4285       }
4286     }
4287     nnz = Ii[i + 1] - Ii[i];
4288     Iii = Ii[i];
4289     ldi = ld[i];
4290     md  = Adi[i + 1] - Adi[i];
4291     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4292     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4293     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4294     ad += md;
4295     ao += nnz - md;
4296   }
4297   nooffprocentries      = mat->nooffprocentries;
4298   mat->nooffprocentries = PETSC_TRUE;
4299   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4300   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4301   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4302   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4303   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4304   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4305   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4306   mat->nooffprocentries = nooffprocentries;
4307   PetscFunctionReturn(PETSC_SUCCESS);
4308 }
4309 
4310 /*@
4311   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4312 
4313   Collective
4314 
4315   Input Parameters:
4316 + mat - the matrix
4317 - v   - matrix values, stored by row
4318 
4319   Level: intermediate
4320 
4321   Notes:
4322   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4323 
4324   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4325 
4326 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4327           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4328 @*/
4329 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4330 {
4331   PetscInt        nnz, i, m;
4332   PetscBool       nooffprocentries;
4333   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4334   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4335   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4336   PetscScalar    *ad, *ao;
4337   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4338   PetscInt        ldi, Iii, md;
4339   PetscInt       *ld = Aij->ld;
4340 
4341   PetscFunctionBegin;
4342   m = mat->rmap->n;
4343 
4344   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4345   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4346   Iii = 0;
4347   for (i = 0; i < m; i++) {
4348     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4349     ldi = ld[i];
4350     md  = Adi[i + 1] - Adi[i];
4351     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4352     ad += md;
4353     if (ao) {
4354       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4355       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4356       ao += nnz - md;
4357     }
4358     Iii += nnz;
4359   }
4360   nooffprocentries      = mat->nooffprocentries;
4361   mat->nooffprocentries = PETSC_TRUE;
4362   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4363   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4364   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4365   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4366   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4367   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4368   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4369   mat->nooffprocentries = nooffprocentries;
4370   PetscFunctionReturn(PETSC_SUCCESS);
4371 }
4372 
4373 /*@
4374   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4375   (the default parallel PETSc format).  For good matrix assembly performance
4376   the user should preallocate the matrix storage by setting the parameters
4377   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4378 
4379   Collective
4380 
4381   Input Parameters:
4382 + comm  - MPI communicator
4383 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4384           This value should be the same as the local size used in creating the
4385           y vector for the matrix-vector product y = Ax.
4386 . n     - This value should be the same as the local size used in creating the
4387           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4388           calculated if N is given) For square matrices n is almost always m.
4389 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4390 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4391 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4392           (same value is used for all local rows)
4393 . d_nnz - array containing the number of nonzeros in the various rows of the
4394           DIAGONAL portion of the local submatrix (possibly different for each row)
4395           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4396           The size of this array is equal to the number of local rows, i.e 'm'.
4397 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4398           submatrix (same value is used for all local rows).
4399 - o_nnz - array containing the number of nonzeros in the various rows of the
4400           OFF-DIAGONAL portion of the local submatrix (possibly different for
4401           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4402           structure. The size of this array is equal to the number
4403           of local rows, i.e 'm'.
4404 
4405   Output Parameter:
4406 . A - the matrix
4407 
4408   Options Database Keys:
4409 + -mat_no_inode                     - Do not use inodes
4410 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4411 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4412                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4413                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4414 
4415   Level: intermediate
4416 
4417   Notes:
4418   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4419   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4420   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4421 
4422   If the *_nnz parameter is given then the *_nz parameter is ignored
4423 
4424   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4425   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4426   storage requirements for this matrix.
4427 
4428   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4429   processor than it must be used on all processors that share the object for
4430   that argument.
4431 
4432   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4433   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4434 
4435   The user MUST specify either the local or global matrix dimensions
4436   (possibly both).
4437 
4438   The parallel matrix is partitioned across processors such that the
4439   first `m0` rows belong to process 0, the next `m1` rows belong to
4440   process 1, the next `m2` rows belong to process 2, etc., where
4441   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4442   values corresponding to [m x N] submatrix.
4443 
4444   The columns are logically partitioned with the n0 columns belonging
4445   to 0th partition, the next n1 columns belonging to the next
4446   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4447 
4448   The DIAGONAL portion of the local submatrix on any given processor
4449   is the submatrix corresponding to the rows and columns m,n
4450   corresponding to the given processor. i.e diagonal matrix on
4451   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4452   etc. The remaining portion of the local submatrix [m x (N-n)]
4453   constitute the OFF-DIAGONAL portion. The example below better
4454   illustrates this concept. The two matrices, the DIAGONAL portion and
4455   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4456 
4457   For a square global matrix we define each processor's diagonal portion
4458   to be its local rows and the corresponding columns (a square submatrix);
4459   each processor's off-diagonal portion encompasses the remainder of the
4460   local matrix (a rectangular submatrix).
4461 
4462   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4463 
4464   When calling this routine with a single process communicator, a matrix of
4465   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4466   type of communicator, use the construction mechanism
4467 .vb
4468   MatCreate(..., &A);
4469   MatSetType(A, MATMPIAIJ);
4470   MatSetSizes(A, m, n, M, N);
4471   MatMPIAIJSetPreallocation(A, ...);
4472 .ve
4473 
4474   By default, this format uses inodes (identical nodes) when possible.
4475   We search for consecutive rows with the same nonzero structure, thereby
4476   reusing matrix information to achieve increased efficiency.
4477 
4478   Example Usage:
4479   Consider the following 8x8 matrix with 34 non-zero values, that is
4480   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4481   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4482   as follows
4483 
4484 .vb
4485             1  2  0  |  0  3  0  |  0  4
4486     Proc0   0  5  6  |  7  0  0  |  8  0
4487             9  0 10  | 11  0  0  | 12  0
4488     -------------------------------------
4489            13  0 14  | 15 16 17  |  0  0
4490     Proc1   0 18  0  | 19 20 21  |  0  0
4491             0  0  0  | 22 23  0  | 24  0
4492     -------------------------------------
4493     Proc2  25 26 27  |  0  0 28  | 29  0
4494            30  0  0  | 31 32 33  |  0 34
4495 .ve
4496 
4497   This can be represented as a collection of submatrices as
4498 
4499 .vb
4500       A B C
4501       D E F
4502       G H I
4503 .ve
4504 
4505   Where the submatrices A,B,C are owned by proc0, D,E,F are
4506   owned by proc1, G,H,I are owned by proc2.
4507 
4508   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4509   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4510   The 'M','N' parameters are 8,8, and have the same values on all procs.
4511 
4512   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4513   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4514   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4515   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4516   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4517   matrix, and [DF] as another SeqAIJ matrix.
4518 
4519   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4520   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4521   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4522   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4523   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4524   In this case, the values of `d_nz`,`o_nz` are
4525 .vb
4526      proc0  dnz = 2, o_nz = 2
4527      proc1  dnz = 3, o_nz = 2
4528      proc2  dnz = 1, o_nz = 4
4529 .ve
4530   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4531   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4532   for proc3. i.e we are using 12+15+10=37 storage locations to store
4533   34 values.
4534 
4535   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4536   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4537   In the above case the values for d_nnz,o_nnz are
4538 .vb
4539      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4540      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4541      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4542 .ve
4543   Here the space allocated is sum of all the above values i.e 34, and
4544   hence pre-allocation is perfect.
4545 
4546 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4547           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4548           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4549 @*/
4550 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4551 {
4552   PetscMPIInt size;
4553 
4554   PetscFunctionBegin;
4555   PetscCall(MatCreate(comm, A));
4556   PetscCall(MatSetSizes(*A, m, n, M, N));
4557   PetscCallMPI(MPI_Comm_size(comm, &size));
4558   if (size > 1) {
4559     PetscCall(MatSetType(*A, MATMPIAIJ));
4560     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4561   } else {
4562     PetscCall(MatSetType(*A, MATSEQAIJ));
4563     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4564   }
4565   PetscFunctionReturn(PETSC_SUCCESS);
4566 }
4567 
4568 /*MC
4569     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4570 
4571     Synopsis:
4572     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4573 
4574     Not Collective
4575 
4576     Input Parameter:
4577 .   A - the `MATMPIAIJ` matrix
4578 
4579     Output Parameters:
4580 +   Ad - the diagonal portion of the matrix
4581 .   Ao - the off-diagonal portion of the matrix
4582 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4583 -   ierr - error code
4584 
4585      Level: advanced
4586 
4587     Note:
4588     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4589 
4590 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4591 M*/
4592 
4593 /*MC
4594     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4595 
4596     Synopsis:
4597     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4598 
4599     Not Collective
4600 
4601     Input Parameters:
4602 +   A - the `MATMPIAIJ` matrix
4603 .   Ad - the diagonal portion of the matrix
4604 .   Ao - the off-diagonal portion of the matrix
4605 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4606 -   ierr - error code
4607 
4608      Level: advanced
4609 
4610 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4611 M*/
4612 
4613 /*@C
4614   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4615 
4616   Not Collective
4617 
4618   Input Parameter:
4619 . A - The `MATMPIAIJ` matrix
4620 
4621   Output Parameters:
4622 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4623 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4624 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4625 
4626   Level: intermediate
4627 
4628   Note:
4629   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4630   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4631   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4632   local column numbers to global column numbers in the original matrix.
4633 
4634   Fortran Notes:
4635   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4636 
4637 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4638 @*/
4639 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4640 {
4641   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4642   PetscBool   flg;
4643 
4644   PetscFunctionBegin;
4645   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4646   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4647   if (Ad) *Ad = a->A;
4648   if (Ao) *Ao = a->B;
4649   if (colmap) *colmap = a->garray;
4650   PetscFunctionReturn(PETSC_SUCCESS);
4651 }
4652 
4653 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4654 {
4655   PetscInt     m, N, i, rstart, nnz, Ii;
4656   PetscInt    *indx;
4657   PetscScalar *values;
4658   MatType      rootType;
4659 
4660   PetscFunctionBegin;
4661   PetscCall(MatGetSize(inmat, &m, &N));
4662   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4663     PetscInt *dnz, *onz, sum, bs, cbs;
4664 
4665     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4666     /* Check sum(n) = N */
4667     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4668     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4669 
4670     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4671     rstart -= m;
4672 
4673     MatPreallocateBegin(comm, m, n, dnz, onz);
4674     for (i = 0; i < m; i++) {
4675       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4676       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4677       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4678     }
4679 
4680     PetscCall(MatCreate(comm, outmat));
4681     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4682     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4683     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4684     PetscCall(MatGetRootType_Private(inmat, &rootType));
4685     PetscCall(MatSetType(*outmat, rootType));
4686     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4687     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4688     MatPreallocateEnd(dnz, onz);
4689     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4690   }
4691 
4692   /* numeric phase */
4693   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4694   for (i = 0; i < m; i++) {
4695     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4696     Ii = i + rstart;
4697     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4698     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4699   }
4700   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4701   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4702   PetscFunctionReturn(PETSC_SUCCESS);
4703 }
4704 
4705 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4706 {
4707   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
4708 
4709   PetscFunctionBegin;
4710   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4711   PetscCall(PetscFree(merge->id_r));
4712   PetscCall(PetscFree(merge->len_s));
4713   PetscCall(PetscFree(merge->len_r));
4714   PetscCall(PetscFree(merge->bi));
4715   PetscCall(PetscFree(merge->bj));
4716   PetscCall(PetscFree(merge->buf_ri[0]));
4717   PetscCall(PetscFree(merge->buf_ri));
4718   PetscCall(PetscFree(merge->buf_rj[0]));
4719   PetscCall(PetscFree(merge->buf_rj));
4720   PetscCall(PetscFree(merge->coi));
4721   PetscCall(PetscFree(merge->coj));
4722   PetscCall(PetscFree(merge->owners_co));
4723   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4724   PetscCall(PetscFree(merge));
4725   PetscFunctionReturn(PETSC_SUCCESS);
4726 }
4727 
4728 #include <../src/mat/utils/freespace.h>
4729 #include <petscbt.h>
4730 
4731 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4732 {
4733   MPI_Comm             comm;
4734   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4735   PetscMPIInt          size, rank, taga, *len_s;
4736   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4737   PetscMPIInt          proc, k;
4738   PetscInt           **buf_ri, **buf_rj;
4739   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4740   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4741   MPI_Request         *s_waits, *r_waits;
4742   MPI_Status          *status;
4743   const MatScalar     *aa, *a_a;
4744   MatScalar          **abuf_r, *ba_i;
4745   Mat_Merge_SeqsToMPI *merge;
4746   PetscContainer       container;
4747 
4748   PetscFunctionBegin;
4749   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4750   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4751 
4752   PetscCallMPI(MPI_Comm_size(comm, &size));
4753   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4754 
4755   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4756   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4757   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4758   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4759   aa = a_a;
4760 
4761   bi     = merge->bi;
4762   bj     = merge->bj;
4763   buf_ri = merge->buf_ri;
4764   buf_rj = merge->buf_rj;
4765 
4766   PetscCall(PetscMalloc1(size, &status));
4767   owners = merge->rowmap->range;
4768   len_s  = merge->len_s;
4769 
4770   /* send and recv matrix values */
4771   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4772   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4773 
4774   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4775   for (proc = 0, k = 0; proc < size; proc++) {
4776     if (!len_s[proc]) continue;
4777     i = owners[proc];
4778     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4779     k++;
4780   }
4781 
4782   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4783   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4784   PetscCall(PetscFree(status));
4785 
4786   PetscCall(PetscFree(s_waits));
4787   PetscCall(PetscFree(r_waits));
4788 
4789   /* insert mat values of mpimat */
4790   PetscCall(PetscMalloc1(N, &ba_i));
4791   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4792 
4793   for (k = 0; k < merge->nrecv; k++) {
4794     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4795     nrows       = *buf_ri_k[k];
4796     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4797     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4798   }
4799 
4800   /* set values of ba */
4801   m = merge->rowmap->n;
4802   for (i = 0; i < m; i++) {
4803     arow = owners[rank] + i;
4804     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4805     bnzi = bi[i + 1] - bi[i];
4806     PetscCall(PetscArrayzero(ba_i, bnzi));
4807 
4808     /* add local non-zero vals of this proc's seqmat into ba */
4809     anzi   = ai[arow + 1] - ai[arow];
4810     aj     = a->j + ai[arow];
4811     aa     = a_a + ai[arow];
4812     nextaj = 0;
4813     for (j = 0; nextaj < anzi; j++) {
4814       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4815         ba_i[j] += aa[nextaj++];
4816       }
4817     }
4818 
4819     /* add received vals into ba */
4820     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4821       /* i-th row */
4822       if (i == *nextrow[k]) {
4823         anzi   = *(nextai[k] + 1) - *nextai[k];
4824         aj     = buf_rj[k] + *nextai[k];
4825         aa     = abuf_r[k] + *nextai[k];
4826         nextaj = 0;
4827         for (j = 0; nextaj < anzi; j++) {
4828           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4829             ba_i[j] += aa[nextaj++];
4830           }
4831         }
4832         nextrow[k]++;
4833         nextai[k]++;
4834       }
4835     }
4836     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4837   }
4838   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4839   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4840   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4841 
4842   PetscCall(PetscFree(abuf_r[0]));
4843   PetscCall(PetscFree(abuf_r));
4844   PetscCall(PetscFree(ba_i));
4845   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4846   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4847   PetscFunctionReturn(PETSC_SUCCESS);
4848 }
4849 
4850 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4851 {
4852   Mat                  B_mpi;
4853   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4854   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4855   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4856   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4857   PetscInt             len, *dnz, *onz, bs, cbs;
4858   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4859   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4860   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4861   MPI_Status          *status;
4862   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4863   PetscBT              lnkbt;
4864   Mat_Merge_SeqsToMPI *merge;
4865   PetscContainer       container;
4866 
4867   PetscFunctionBegin;
4868   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4869 
4870   /* make sure it is a PETSc comm */
4871   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4872   PetscCallMPI(MPI_Comm_size(comm, &size));
4873   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4874 
4875   PetscCall(PetscNew(&merge));
4876   PetscCall(PetscMalloc1(size, &status));
4877 
4878   /* determine row ownership */
4879   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4880   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4881   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4882   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4883   PetscCall(PetscLayoutSetUp(merge->rowmap));
4884   PetscCall(PetscMalloc1(size, &len_si));
4885   PetscCall(PetscMalloc1(size, &merge->len_s));
4886 
4887   m      = merge->rowmap->n;
4888   owners = merge->rowmap->range;
4889 
4890   /* determine the number of messages to send, their lengths */
4891   len_s = merge->len_s;
4892 
4893   len          = 0; /* length of buf_si[] */
4894   merge->nsend = 0;
4895   for (PetscMPIInt proc = 0; proc < size; proc++) {
4896     len_si[proc] = 0;
4897     if (proc == rank) {
4898       len_s[proc] = 0;
4899     } else {
4900       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4901       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4902     }
4903     if (len_s[proc]) {
4904       merge->nsend++;
4905       nrows = 0;
4906       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4907         if (ai[i + 1] > ai[i]) nrows++;
4908       }
4909       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4910       len += len_si[proc];
4911     }
4912   }
4913 
4914   /* determine the number and length of messages to receive for ij-structure */
4915   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4916   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4917 
4918   /* post the Irecv of j-structure */
4919   PetscCall(PetscCommGetNewTag(comm, &tagj));
4920   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4921 
4922   /* post the Isend of j-structure */
4923   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4924 
4925   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4926     if (!len_s[proc]) continue;
4927     i = owners[proc];
4928     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4929     k++;
4930   }
4931 
4932   /* receives and sends of j-structure are complete */
4933   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4934   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4935 
4936   /* send and recv i-structure */
4937   PetscCall(PetscCommGetNewTag(comm, &tagi));
4938   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4939 
4940   PetscCall(PetscMalloc1(len + 1, &buf_s));
4941   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4942   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4943     if (!len_s[proc]) continue;
4944     /* form outgoing message for i-structure:
4945          buf_si[0]:                 nrows to be sent
4946                [1:nrows]:           row index (global)
4947                [nrows+1:2*nrows+1]: i-structure index
4948     */
4949     nrows       = len_si[proc] / 2 - 1;
4950     buf_si_i    = buf_si + nrows + 1;
4951     buf_si[0]   = nrows;
4952     buf_si_i[0] = 0;
4953     nrows       = 0;
4954     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4955       anzi = ai[i + 1] - ai[i];
4956       if (anzi) {
4957         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4958         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4959         nrows++;
4960       }
4961     }
4962     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4963     k++;
4964     buf_si += len_si[proc];
4965   }
4966 
4967   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4968   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4969 
4970   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4971   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4972 
4973   PetscCall(PetscFree(len_si));
4974   PetscCall(PetscFree(len_ri));
4975   PetscCall(PetscFree(rj_waits));
4976   PetscCall(PetscFree2(si_waits, sj_waits));
4977   PetscCall(PetscFree(ri_waits));
4978   PetscCall(PetscFree(buf_s));
4979   PetscCall(PetscFree(status));
4980 
4981   /* compute a local seq matrix in each processor */
4982   /* allocate bi array and free space for accumulating nonzero column info */
4983   PetscCall(PetscMalloc1(m + 1, &bi));
4984   bi[0] = 0;
4985 
4986   /* create and initialize a linked list */
4987   nlnk = N + 1;
4988   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4989 
4990   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4991   len = ai[owners[rank + 1]] - ai[owners[rank]];
4992   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4993 
4994   current_space = free_space;
4995 
4996   /* determine symbolic info for each local row */
4997   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4998 
4999   for (k = 0; k < merge->nrecv; k++) {
5000     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5001     nrows       = *buf_ri_k[k];
5002     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5003     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5004   }
5005 
5006   MatPreallocateBegin(comm, m, n, dnz, onz);
5007   len = 0;
5008   for (i = 0; i < m; i++) {
5009     bnzi = 0;
5010     /* add local non-zero cols of this proc's seqmat into lnk */
5011     arow = owners[rank] + i;
5012     anzi = ai[arow + 1] - ai[arow];
5013     aj   = a->j + ai[arow];
5014     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5015     bnzi += nlnk;
5016     /* add received col data into lnk */
5017     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5018       if (i == *nextrow[k]) {            /* i-th row */
5019         anzi = *(nextai[k] + 1) - *nextai[k];
5020         aj   = buf_rj[k] + *nextai[k];
5021         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5022         bnzi += nlnk;
5023         nextrow[k]++;
5024         nextai[k]++;
5025       }
5026     }
5027     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5028 
5029     /* if free space is not available, make more free space */
5030     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5031     /* copy data into free space, then initialize lnk */
5032     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5033     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5034 
5035     current_space->array += bnzi;
5036     current_space->local_used += bnzi;
5037     current_space->local_remaining -= bnzi;
5038 
5039     bi[i + 1] = bi[i] + bnzi;
5040   }
5041 
5042   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5043 
5044   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5045   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5046   PetscCall(PetscLLDestroy(lnk, lnkbt));
5047 
5048   /* create symbolic parallel matrix B_mpi */
5049   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5050   PetscCall(MatCreate(comm, &B_mpi));
5051   if (n == PETSC_DECIDE) {
5052     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5053   } else {
5054     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5055   }
5056   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5057   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5058   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5059   MatPreallocateEnd(dnz, onz);
5060   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5061 
5062   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5063   B_mpi->assembled = PETSC_FALSE;
5064   merge->bi        = bi;
5065   merge->bj        = bj;
5066   merge->buf_ri    = buf_ri;
5067   merge->buf_rj    = buf_rj;
5068   merge->coi       = NULL;
5069   merge->coj       = NULL;
5070   merge->owners_co = NULL;
5071 
5072   PetscCall(PetscCommDestroy(&comm));
5073 
5074   /* attach the supporting struct to B_mpi for reuse */
5075   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5076   PetscCall(PetscContainerSetPointer(container, merge));
5077   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5078   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5079   PetscCall(PetscContainerDestroy(&container));
5080   *mpimat = B_mpi;
5081 
5082   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5083   PetscFunctionReturn(PETSC_SUCCESS);
5084 }
5085 
5086 /*@
5087   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5088   matrices from each processor
5089 
5090   Collective
5091 
5092   Input Parameters:
5093 + comm   - the communicators the parallel matrix will live on
5094 . seqmat - the input sequential matrices
5095 . m      - number of local rows (or `PETSC_DECIDE`)
5096 . n      - number of local columns (or `PETSC_DECIDE`)
5097 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5098 
5099   Output Parameter:
5100 . mpimat - the parallel matrix generated
5101 
5102   Level: advanced
5103 
5104   Note:
5105   The dimensions of the sequential matrix in each processor MUST be the same.
5106   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5107   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5108 
5109 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5110 @*/
5111 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5112 {
5113   PetscMPIInt size;
5114 
5115   PetscFunctionBegin;
5116   PetscCallMPI(MPI_Comm_size(comm, &size));
5117   if (size == 1) {
5118     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5119     if (scall == MAT_INITIAL_MATRIX) {
5120       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5121     } else {
5122       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5123     }
5124     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5125     PetscFunctionReturn(PETSC_SUCCESS);
5126   }
5127   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5128   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5129   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5130   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5131   PetscFunctionReturn(PETSC_SUCCESS);
5132 }
5133 
5134 /*@
5135   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5136 
5137   Not Collective
5138 
5139   Input Parameter:
5140 . A - the matrix
5141 
5142   Output Parameter:
5143 . A_loc - the local sequential matrix generated
5144 
5145   Level: developer
5146 
5147   Notes:
5148   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5149   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5150   `n` is the global column count obtained with `MatGetSize()`
5151 
5152   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5153 
5154   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5155 
5156   Destroy the matrix with `MatDestroy()`
5157 
5158 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5159 @*/
5160 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5161 {
5162   PetscBool mpi;
5163 
5164   PetscFunctionBegin;
5165   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5166   if (mpi) {
5167     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5168   } else {
5169     *A_loc = A;
5170     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5171   }
5172   PetscFunctionReturn(PETSC_SUCCESS);
5173 }
5174 
5175 /*@
5176   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5177 
5178   Not Collective
5179 
5180   Input Parameters:
5181 + A     - the matrix
5182 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5183 
5184   Output Parameter:
5185 . A_loc - the local sequential matrix generated
5186 
5187   Level: developer
5188 
5189   Notes:
5190   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5191   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5192   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5193 
5194   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5195 
5196   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5197   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5198   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5199   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5200 
5201 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5202 @*/
5203 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5204 {
5205   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5206   Mat_SeqAIJ        *mat, *a, *b;
5207   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5208   const PetscScalar *aa, *ba, *aav, *bav;
5209   PetscScalar       *ca, *cam;
5210   PetscMPIInt        size;
5211   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5212   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5213   PetscBool          match;
5214 
5215   PetscFunctionBegin;
5216   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5217   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5218   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5219   if (size == 1) {
5220     if (scall == MAT_INITIAL_MATRIX) {
5221       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5222       *A_loc = mpimat->A;
5223     } else if (scall == MAT_REUSE_MATRIX) {
5224       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5225     }
5226     PetscFunctionReturn(PETSC_SUCCESS);
5227   }
5228 
5229   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5230   a  = (Mat_SeqAIJ *)mpimat->A->data;
5231   b  = (Mat_SeqAIJ *)mpimat->B->data;
5232   ai = a->i;
5233   aj = a->j;
5234   bi = b->i;
5235   bj = b->j;
5236   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5237   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5238   aa = aav;
5239   ba = bav;
5240   if (scall == MAT_INITIAL_MATRIX) {
5241     PetscCall(PetscMalloc1(1 + am, &ci));
5242     ci[0] = 0;
5243     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5244     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5245     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5246     k = 0;
5247     for (i = 0; i < am; i++) {
5248       ncols_o = bi[i + 1] - bi[i];
5249       ncols_d = ai[i + 1] - ai[i];
5250       /* off-diagonal portion of A */
5251       for (jo = 0; jo < ncols_o; jo++) {
5252         col = cmap[*bj];
5253         if (col >= cstart) break;
5254         cj[k] = col;
5255         bj++;
5256         ca[k++] = *ba++;
5257       }
5258       /* diagonal portion of A */
5259       for (j = 0; j < ncols_d; j++) {
5260         cj[k]   = cstart + *aj++;
5261         ca[k++] = *aa++;
5262       }
5263       /* off-diagonal portion of A */
5264       for (j = jo; j < ncols_o; j++) {
5265         cj[k]   = cmap[*bj++];
5266         ca[k++] = *ba++;
5267       }
5268     }
5269     /* put together the new matrix */
5270     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5271     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5272     /* Since these are PETSc arrays, change flags to free them as necessary. */
5273     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5274     mat->free_a  = PETSC_TRUE;
5275     mat->free_ij = PETSC_TRUE;
5276     mat->nonew   = 0;
5277   } else if (scall == MAT_REUSE_MATRIX) {
5278     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5279     ci  = mat->i;
5280     cj  = mat->j;
5281     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5282     for (i = 0; i < am; i++) {
5283       /* off-diagonal portion of A */
5284       ncols_o = bi[i + 1] - bi[i];
5285       for (jo = 0; jo < ncols_o; jo++) {
5286         col = cmap[*bj];
5287         if (col >= cstart) break;
5288         *cam++ = *ba++;
5289         bj++;
5290       }
5291       /* diagonal portion of A */
5292       ncols_d = ai[i + 1] - ai[i];
5293       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5294       /* off-diagonal portion of A */
5295       for (j = jo; j < ncols_o; j++) {
5296         *cam++ = *ba++;
5297         bj++;
5298       }
5299     }
5300     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5301   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5302   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5303   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5304   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5305   PetscFunctionReturn(PETSC_SUCCESS);
5306 }
5307 
5308 /*@
5309   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5310   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5311 
5312   Not Collective
5313 
5314   Input Parameters:
5315 + A     - the matrix
5316 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5317 
5318   Output Parameters:
5319 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5320 - A_loc - the local sequential matrix generated
5321 
5322   Level: developer
5323 
5324   Note:
5325   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5326   part, then those associated with the off-diagonal part (in its local ordering)
5327 
5328 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5329 @*/
5330 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5331 {
5332   Mat             Ao, Ad;
5333   const PetscInt *cmap;
5334   PetscMPIInt     size;
5335   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5336 
5337   PetscFunctionBegin;
5338   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5339   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5340   if (size == 1) {
5341     if (scall == MAT_INITIAL_MATRIX) {
5342       PetscCall(PetscObjectReference((PetscObject)Ad));
5343       *A_loc = Ad;
5344     } else if (scall == MAT_REUSE_MATRIX) {
5345       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5346     }
5347     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5348     PetscFunctionReturn(PETSC_SUCCESS);
5349   }
5350   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5351   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5352   if (f) {
5353     PetscCall((*f)(A, scall, glob, A_loc));
5354   } else {
5355     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5356     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5357     Mat_SeqAIJ        *c;
5358     PetscInt          *ai = a->i, *aj = a->j;
5359     PetscInt          *bi = b->i, *bj = b->j;
5360     PetscInt          *ci, *cj;
5361     const PetscScalar *aa, *ba;
5362     PetscScalar       *ca;
5363     PetscInt           i, j, am, dn, on;
5364 
5365     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5366     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5367     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5368     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5369     if (scall == MAT_INITIAL_MATRIX) {
5370       PetscInt k;
5371       PetscCall(PetscMalloc1(1 + am, &ci));
5372       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5373       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5374       ci[0] = 0;
5375       for (i = 0, k = 0; i < am; i++) {
5376         const PetscInt ncols_o = bi[i + 1] - bi[i];
5377         const PetscInt ncols_d = ai[i + 1] - ai[i];
5378         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5379         /* diagonal portion of A */
5380         for (j = 0; j < ncols_d; j++, k++) {
5381           cj[k] = *aj++;
5382           ca[k] = *aa++;
5383         }
5384         /* off-diagonal portion of A */
5385         for (j = 0; j < ncols_o; j++, k++) {
5386           cj[k] = dn + *bj++;
5387           ca[k] = *ba++;
5388         }
5389       }
5390       /* put together the new matrix */
5391       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5392       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5393       /* Since these are PETSc arrays, change flags to free them as necessary. */
5394       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5395       c->free_a  = PETSC_TRUE;
5396       c->free_ij = PETSC_TRUE;
5397       c->nonew   = 0;
5398       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5399     } else if (scall == MAT_REUSE_MATRIX) {
5400       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5401       for (i = 0; i < am; i++) {
5402         const PetscInt ncols_d = ai[i + 1] - ai[i];
5403         const PetscInt ncols_o = bi[i + 1] - bi[i];
5404         /* diagonal portion of A */
5405         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5406         /* off-diagonal portion of A */
5407         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5408       }
5409       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5410     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5411     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5412     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5413     if (glob) {
5414       PetscInt cst, *gidx;
5415 
5416       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5417       PetscCall(PetscMalloc1(dn + on, &gidx));
5418       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5419       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5420       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5421     }
5422   }
5423   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5424   PetscFunctionReturn(PETSC_SUCCESS);
5425 }
5426 
5427 /*@C
5428   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5429 
5430   Not Collective
5431 
5432   Input Parameters:
5433 + A     - the matrix
5434 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5435 . row   - index set of rows to extract (or `NULL`)
5436 - col   - index set of columns to extract (or `NULL`)
5437 
5438   Output Parameter:
5439 . A_loc - the local sequential matrix generated
5440 
5441   Level: developer
5442 
5443 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5444 @*/
5445 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5446 {
5447   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5448   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5449   IS          isrowa, iscola;
5450   Mat        *aloc;
5451   PetscBool   match;
5452 
5453   PetscFunctionBegin;
5454   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5455   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5456   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5457   if (!row) {
5458     start = A->rmap->rstart;
5459     end   = A->rmap->rend;
5460     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5461   } else {
5462     isrowa = *row;
5463   }
5464   if (!col) {
5465     start = A->cmap->rstart;
5466     cmap  = a->garray;
5467     nzA   = a->A->cmap->n;
5468     nzB   = a->B->cmap->n;
5469     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5470     ncols = 0;
5471     for (i = 0; i < nzB; i++) {
5472       if (cmap[i] < start) idx[ncols++] = cmap[i];
5473       else break;
5474     }
5475     imark = i;
5476     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5477     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5478     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5479   } else {
5480     iscola = *col;
5481   }
5482   if (scall != MAT_INITIAL_MATRIX) {
5483     PetscCall(PetscMalloc1(1, &aloc));
5484     aloc[0] = *A_loc;
5485   }
5486   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5487   if (!col) { /* attach global id of condensed columns */
5488     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5489   }
5490   *A_loc = aloc[0];
5491   PetscCall(PetscFree(aloc));
5492   if (!row) PetscCall(ISDestroy(&isrowa));
5493   if (!col) PetscCall(ISDestroy(&iscola));
5494   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5495   PetscFunctionReturn(PETSC_SUCCESS);
5496 }
5497 
5498 /*
5499  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5500  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5501  * on a global size.
5502  * */
5503 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5504 {
5505   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5506   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5507   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5508   PetscMPIInt            owner;
5509   PetscSFNode           *iremote, *oiremote;
5510   const PetscInt        *lrowindices;
5511   PetscSF                sf, osf;
5512   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5513   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5514   MPI_Comm               comm;
5515   ISLocalToGlobalMapping mapping;
5516   const PetscScalar     *pd_a, *po_a;
5517 
5518   PetscFunctionBegin;
5519   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5520   /* plocalsize is the number of roots
5521    * nrows is the number of leaves
5522    * */
5523   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5524   PetscCall(ISGetLocalSize(rows, &nrows));
5525   PetscCall(PetscCalloc1(nrows, &iremote));
5526   PetscCall(ISGetIndices(rows, &lrowindices));
5527   for (i = 0; i < nrows; i++) {
5528     /* Find a remote index and an owner for a row
5529      * The row could be local or remote
5530      * */
5531     owner = 0;
5532     lidx  = 0;
5533     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5534     iremote[i].index = lidx;
5535     iremote[i].rank  = owner;
5536   }
5537   /* Create SF to communicate how many nonzero columns for each row */
5538   PetscCall(PetscSFCreate(comm, &sf));
5539   /* SF will figure out the number of nonzero columns for each row, and their
5540    * offsets
5541    * */
5542   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5543   PetscCall(PetscSFSetFromOptions(sf));
5544   PetscCall(PetscSFSetUp(sf));
5545 
5546   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5547   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5548   PetscCall(PetscCalloc1(nrows, &pnnz));
5549   roffsets[0] = 0;
5550   roffsets[1] = 0;
5551   for (i = 0; i < plocalsize; i++) {
5552     /* diagonal */
5553     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5554     /* off-diagonal */
5555     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5556     /* compute offsets so that we relative location for each row */
5557     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5558     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5559   }
5560   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5561   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5562   /* 'r' means root, and 'l' means leaf */
5563   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5564   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5565   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5566   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5567   PetscCall(PetscSFDestroy(&sf));
5568   PetscCall(PetscFree(roffsets));
5569   PetscCall(PetscFree(nrcols));
5570   dntotalcols = 0;
5571   ontotalcols = 0;
5572   ncol        = 0;
5573   for (i = 0; i < nrows; i++) {
5574     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5575     ncol    = PetscMax(pnnz[i], ncol);
5576     /* diagonal */
5577     dntotalcols += nlcols[i * 2 + 0];
5578     /* off-diagonal */
5579     ontotalcols += nlcols[i * 2 + 1];
5580   }
5581   /* We do not need to figure the right number of columns
5582    * since all the calculations will be done by going through the raw data
5583    * */
5584   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5585   PetscCall(MatSetUp(*P_oth));
5586   PetscCall(PetscFree(pnnz));
5587   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5588   /* diagonal */
5589   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5590   /* off-diagonal */
5591   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5592   /* diagonal */
5593   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5594   /* off-diagonal */
5595   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5596   dntotalcols = 0;
5597   ontotalcols = 0;
5598   ntotalcols  = 0;
5599   for (i = 0; i < nrows; i++) {
5600     owner = 0;
5601     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5602     /* Set iremote for diag matrix */
5603     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5604       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5605       iremote[dntotalcols].rank  = owner;
5606       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5607       ilocal[dntotalcols++] = ntotalcols++;
5608     }
5609     /* off-diagonal */
5610     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5611       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5612       oiremote[ontotalcols].rank  = owner;
5613       oilocal[ontotalcols++]      = ntotalcols++;
5614     }
5615   }
5616   PetscCall(ISRestoreIndices(rows, &lrowindices));
5617   PetscCall(PetscFree(loffsets));
5618   PetscCall(PetscFree(nlcols));
5619   PetscCall(PetscSFCreate(comm, &sf));
5620   /* P serves as roots and P_oth is leaves
5621    * Diag matrix
5622    * */
5623   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5624   PetscCall(PetscSFSetFromOptions(sf));
5625   PetscCall(PetscSFSetUp(sf));
5626 
5627   PetscCall(PetscSFCreate(comm, &osf));
5628   /* off-diagonal */
5629   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5630   PetscCall(PetscSFSetFromOptions(osf));
5631   PetscCall(PetscSFSetUp(osf));
5632   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5633   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5634   /* operate on the matrix internal data to save memory */
5635   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5636   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5637   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5638   /* Convert to global indices for diag matrix */
5639   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5640   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5641   /* We want P_oth store global indices */
5642   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5643   /* Use memory scalable approach */
5644   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5645   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5646   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5647   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5648   /* Convert back to local indices */
5649   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5650   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5651   nout = 0;
5652   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5653   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5654   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5655   /* Exchange values */
5656   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5657   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5658   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5659   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5660   /* Stop PETSc from shrinking memory */
5661   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5662   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5663   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5664   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5665   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5666   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5667   PetscCall(PetscSFDestroy(&sf));
5668   PetscCall(PetscSFDestroy(&osf));
5669   PetscFunctionReturn(PETSC_SUCCESS);
5670 }
5671 
5672 /*
5673  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5674  * This supports MPIAIJ and MAIJ
5675  * */
5676 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5677 {
5678   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5679   Mat_SeqAIJ *p_oth;
5680   IS          rows, map;
5681   PetscHMapI  hamp;
5682   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5683   MPI_Comm    comm;
5684   PetscSF     sf, osf;
5685   PetscBool   has;
5686 
5687   PetscFunctionBegin;
5688   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5689   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5690   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5691    *  and then create a submatrix (that often is an overlapping matrix)
5692    * */
5693   if (reuse == MAT_INITIAL_MATRIX) {
5694     /* Use a hash table to figure out unique keys */
5695     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5696     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5697     count = 0;
5698     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5699     for (i = 0; i < a->B->cmap->n; i++) {
5700       key = a->garray[i] / dof;
5701       PetscCall(PetscHMapIHas(hamp, key, &has));
5702       if (!has) {
5703         mapping[i] = count;
5704         PetscCall(PetscHMapISet(hamp, key, count++));
5705       } else {
5706         /* Current 'i' has the same value the previous step */
5707         mapping[i] = count - 1;
5708       }
5709     }
5710     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5711     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5712     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5713     PetscCall(PetscCalloc1(htsize, &rowindices));
5714     off = 0;
5715     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5716     PetscCall(PetscHMapIDestroy(&hamp));
5717     PetscCall(PetscSortInt(htsize, rowindices));
5718     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5719     /* In case, the matrix was already created but users want to recreate the matrix */
5720     PetscCall(MatDestroy(P_oth));
5721     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5722     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5723     PetscCall(ISDestroy(&map));
5724     PetscCall(ISDestroy(&rows));
5725   } else if (reuse == MAT_REUSE_MATRIX) {
5726     /* If matrix was already created, we simply update values using SF objects
5727      * that as attached to the matrix earlier.
5728      */
5729     const PetscScalar *pd_a, *po_a;
5730 
5731     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5732     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5733     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5734     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5735     /* Update values in place */
5736     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5737     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5738     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5739     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5740     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5741     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5742     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5743     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5744   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5745   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5746   PetscFunctionReturn(PETSC_SUCCESS);
5747 }
5748 
5749 /*@C
5750   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5751 
5752   Collective
5753 
5754   Input Parameters:
5755 + A     - the first matrix in `MATMPIAIJ` format
5756 . B     - the second matrix in `MATMPIAIJ` format
5757 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5758 
5759   Output Parameters:
5760 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5761 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5762 - B_seq - the sequential matrix generated
5763 
5764   Level: developer
5765 
5766 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5767 @*/
5768 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5769 {
5770   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5771   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5772   IS          isrowb, iscolb;
5773   Mat        *bseq = NULL;
5774 
5775   PetscFunctionBegin;
5776   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5777              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5778   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5779 
5780   if (scall == MAT_INITIAL_MATRIX) {
5781     start = A->cmap->rstart;
5782     cmap  = a->garray;
5783     nzA   = a->A->cmap->n;
5784     nzB   = a->B->cmap->n;
5785     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5786     ncols = 0;
5787     for (i = 0; i < nzB; i++) { /* row < local row index */
5788       if (cmap[i] < start) idx[ncols++] = cmap[i];
5789       else break;
5790     }
5791     imark = i;
5792     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5793     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5794     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5795     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5796   } else {
5797     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5798     isrowb = *rowb;
5799     iscolb = *colb;
5800     PetscCall(PetscMalloc1(1, &bseq));
5801     bseq[0] = *B_seq;
5802   }
5803   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5804   *B_seq = bseq[0];
5805   PetscCall(PetscFree(bseq));
5806   if (!rowb) {
5807     PetscCall(ISDestroy(&isrowb));
5808   } else {
5809     *rowb = isrowb;
5810   }
5811   if (!colb) {
5812     PetscCall(ISDestroy(&iscolb));
5813   } else {
5814     *colb = iscolb;
5815   }
5816   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5817   PetscFunctionReturn(PETSC_SUCCESS);
5818 }
5819 
5820 /*
5821     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5822     of the OFF-DIAGONAL portion of local A
5823 
5824     Collective
5825 
5826    Input Parameters:
5827 +    A,B - the matrices in `MATMPIAIJ` format
5828 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5829 
5830    Output Parameter:
5831 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5832 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5833 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5834 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5835 
5836     Developer Note:
5837     This directly accesses information inside the VecScatter associated with the matrix-vector product
5838      for this matrix. This is not desirable..
5839 
5840     Level: developer
5841 
5842 */
5843 
5844 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5845 {
5846   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5847   VecScatter         ctx;
5848   MPI_Comm           comm;
5849   const PetscMPIInt *rprocs, *sprocs;
5850   PetscMPIInt        nrecvs, nsends;
5851   const PetscInt    *srow, *rstarts, *sstarts;
5852   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5853   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5854   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5855   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5856   PetscMPIInt        size, tag, rank, nreqs;
5857 
5858   PetscFunctionBegin;
5859   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5860   PetscCallMPI(MPI_Comm_size(comm, &size));
5861 
5862   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5863              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5864   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5865   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5866 
5867   if (size == 1) {
5868     startsj_s = NULL;
5869     bufa_ptr  = NULL;
5870     *B_oth    = NULL;
5871     PetscFunctionReturn(PETSC_SUCCESS);
5872   }
5873 
5874   ctx = a->Mvctx;
5875   tag = ((PetscObject)ctx)->tag;
5876 
5877   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5878   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5879   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5880   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5881   PetscCall(PetscMalloc1(nreqs, &reqs));
5882   rwaits = reqs;
5883   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5884 
5885   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5886   if (scall == MAT_INITIAL_MATRIX) {
5887     /* i-array */
5888     /*  post receives */
5889     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5890     for (i = 0; i < nrecvs; i++) {
5891       rowlen = rvalues + rstarts[i] * rbs;
5892       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5893       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5894     }
5895 
5896     /* pack the outgoing message */
5897     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5898 
5899     sstartsj[0] = 0;
5900     rstartsj[0] = 0;
5901     len         = 0; /* total length of j or a array to be sent */
5902     if (nsends) {
5903       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5904       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5905     }
5906     for (i = 0; i < nsends; i++) {
5907       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5908       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5909       for (j = 0; j < nrows; j++) {
5910         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5911         for (l = 0; l < sbs; l++) {
5912           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5913 
5914           rowlen[j * sbs + l] = ncols;
5915 
5916           len += ncols;
5917           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5918         }
5919         k++;
5920       }
5921       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5922 
5923       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5924     }
5925     /* recvs and sends of i-array are completed */
5926     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5927     PetscCall(PetscFree(svalues));
5928 
5929     /* allocate buffers for sending j and a arrays */
5930     PetscCall(PetscMalloc1(len + 1, &bufj));
5931     PetscCall(PetscMalloc1(len + 1, &bufa));
5932 
5933     /* create i-array of B_oth */
5934     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5935 
5936     b_othi[0] = 0;
5937     len       = 0; /* total length of j or a array to be received */
5938     k         = 0;
5939     for (i = 0; i < nrecvs; i++) {
5940       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5941       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5942       for (j = 0; j < nrows; j++) {
5943         b_othi[k + 1] = b_othi[k] + rowlen[j];
5944         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5945         k++;
5946       }
5947       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5948     }
5949     PetscCall(PetscFree(rvalues));
5950 
5951     /* allocate space for j and a arrays of B_oth */
5952     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5953     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5954 
5955     /* j-array */
5956     /*  post receives of j-array */
5957     for (i = 0; i < nrecvs; i++) {
5958       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5959       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5960     }
5961 
5962     /* pack the outgoing message j-array */
5963     if (nsends) k = sstarts[0];
5964     for (i = 0; i < nsends; i++) {
5965       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5966       bufJ  = bufj + sstartsj[i];
5967       for (j = 0; j < nrows; j++) {
5968         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5969         for (ll = 0; ll < sbs; ll++) {
5970           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5971           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5972           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5973         }
5974       }
5975       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5976     }
5977 
5978     /* recvs and sends of j-array are completed */
5979     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5980   } else if (scall == MAT_REUSE_MATRIX) {
5981     sstartsj = *startsj_s;
5982     rstartsj = *startsj_r;
5983     bufa     = *bufa_ptr;
5984     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5985   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5986 
5987   /* a-array */
5988   /*  post receives of a-array */
5989   for (i = 0; i < nrecvs; i++) {
5990     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5991     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5992   }
5993 
5994   /* pack the outgoing message a-array */
5995   if (nsends) k = sstarts[0];
5996   for (i = 0; i < nsends; i++) {
5997     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5998     bufA  = bufa + sstartsj[i];
5999     for (j = 0; j < nrows; j++) {
6000       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6001       for (ll = 0; ll < sbs; ll++) {
6002         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6003         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6004         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6005       }
6006     }
6007     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6008   }
6009   /* recvs and sends of a-array are completed */
6010   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6011   PetscCall(PetscFree(reqs));
6012 
6013   if (scall == MAT_INITIAL_MATRIX) {
6014     Mat_SeqAIJ *b_oth;
6015 
6016     /* put together the new matrix */
6017     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6018 
6019     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6020     /* Since these are PETSc arrays, change flags to free them as necessary. */
6021     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6022     b_oth->free_a  = PETSC_TRUE;
6023     b_oth->free_ij = PETSC_TRUE;
6024     b_oth->nonew   = 0;
6025 
6026     PetscCall(PetscFree(bufj));
6027     if (!startsj_s || !bufa_ptr) {
6028       PetscCall(PetscFree2(sstartsj, rstartsj));
6029       PetscCall(PetscFree(bufa_ptr));
6030     } else {
6031       *startsj_s = sstartsj;
6032       *startsj_r = rstartsj;
6033       *bufa_ptr  = bufa;
6034     }
6035   } else if (scall == MAT_REUSE_MATRIX) {
6036     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6037   }
6038 
6039   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6040   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6041   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6042   PetscFunctionReturn(PETSC_SUCCESS);
6043 }
6044 
6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6048 #if defined(PETSC_HAVE_MKL_SPARSE)
6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6050 #endif
6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6053 #if defined(PETSC_HAVE_ELEMENTAL)
6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6055 #endif
6056 #if defined(PETSC_HAVE_SCALAPACK)
6057 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6058 #endif
6059 #if defined(PETSC_HAVE_HYPRE)
6060 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6061 #endif
6062 #if defined(PETSC_HAVE_CUDA)
6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6064 #endif
6065 #if defined(PETSC_HAVE_HIP)
6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6067 #endif
6068 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6069 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6070 #endif
6071 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6072 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6073 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6074 
6075 /*
6076     Computes (B'*A')' since computing B*A directly is untenable
6077 
6078                n                       p                          p
6079         [             ]       [             ]         [                 ]
6080       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6081         [             ]       [             ]         [                 ]
6082 
6083 */
6084 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6085 {
6086   Mat At, Bt, Ct;
6087 
6088   PetscFunctionBegin;
6089   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6090   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6091   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6092   PetscCall(MatDestroy(&At));
6093   PetscCall(MatDestroy(&Bt));
6094   PetscCall(MatTransposeSetPrecursor(Ct, C));
6095   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6096   PetscCall(MatDestroy(&Ct));
6097   PetscFunctionReturn(PETSC_SUCCESS);
6098 }
6099 
6100 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6101 {
6102   PetscBool cisdense;
6103 
6104   PetscFunctionBegin;
6105   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6106   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6107   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6108   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6109   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6110   PetscCall(MatSetUp(C));
6111 
6112   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6113   PetscFunctionReturn(PETSC_SUCCESS);
6114 }
6115 
6116 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6117 {
6118   Mat_Product *product = C->product;
6119   Mat          A = product->A, B = product->B;
6120 
6121   PetscFunctionBegin;
6122   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6123              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6124   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6125   C->ops->productsymbolic = MatProductSymbolic_AB;
6126   PetscFunctionReturn(PETSC_SUCCESS);
6127 }
6128 
6129 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6130 {
6131   Mat_Product *product = C->product;
6132 
6133   PetscFunctionBegin;
6134   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6135   PetscFunctionReturn(PETSC_SUCCESS);
6136 }
6137 
6138 /*
6139    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6140 
6141   Input Parameters:
6142 
6143     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6144     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6145 
6146     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6147 
6148     For Set1, j1[] contains column indices of the nonzeros.
6149     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6150     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6151     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6152 
6153     Similar for Set2.
6154 
6155     This routine merges the two sets of nonzeros row by row and removes repeats.
6156 
6157   Output Parameters: (memory is allocated by the caller)
6158 
6159     i[],j[]: the CSR of the merged matrix, which has m rows.
6160     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6161     imap2[]: similar to imap1[], but for Set2.
6162     Note we order nonzeros row-by-row and from left to right.
6163 */
6164 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6165 {
6166   PetscInt   r, m; /* Row index of mat */
6167   PetscCount t, t1, t2, b1, e1, b2, e2;
6168 
6169   PetscFunctionBegin;
6170   PetscCall(MatGetLocalSize(mat, &m, NULL));
6171   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6172   i[0]        = 0;
6173   for (r = 0; r < m; r++) { /* Do row by row merging */
6174     b1 = rowBegin1[r];
6175     e1 = rowEnd1[r];
6176     b2 = rowBegin2[r];
6177     e2 = rowEnd2[r];
6178     while (b1 < e1 && b2 < e2) {
6179       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6180         j[t]      = j1[b1];
6181         imap1[t1] = t;
6182         imap2[t2] = t;
6183         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6184         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6185         t1++;
6186         t2++;
6187         t++;
6188       } else if (j1[b1] < j2[b2]) {
6189         j[t]      = j1[b1];
6190         imap1[t1] = t;
6191         b1 += jmap1[t1 + 1] - jmap1[t1];
6192         t1++;
6193         t++;
6194       } else {
6195         j[t]      = j2[b2];
6196         imap2[t2] = t;
6197         b2 += jmap2[t2 + 1] - jmap2[t2];
6198         t2++;
6199         t++;
6200       }
6201     }
6202     /* Merge the remaining in either j1[] or j2[] */
6203     while (b1 < e1) {
6204       j[t]      = j1[b1];
6205       imap1[t1] = t;
6206       b1 += jmap1[t1 + 1] - jmap1[t1];
6207       t1++;
6208       t++;
6209     }
6210     while (b2 < e2) {
6211       j[t]      = j2[b2];
6212       imap2[t2] = t;
6213       b2 += jmap2[t2 + 1] - jmap2[t2];
6214       t2++;
6215       t++;
6216     }
6217     PetscCall(PetscIntCast(t, i + r + 1));
6218   }
6219   PetscFunctionReturn(PETSC_SUCCESS);
6220 }
6221 
6222 /*
6223   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6224 
6225   Input Parameters:
6226     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6227     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6228       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6229 
6230       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6231       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6232 
6233   Output Parameters:
6234     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6235     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6236       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6237       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6238 
6239     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6240       Atot: number of entries belonging to the diagonal block.
6241       Annz: number of unique nonzeros belonging to the diagonal block.
6242       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6243         repeats (i.e., same 'i,j' pair).
6244       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6245         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6246 
6247       Atot: number of entries belonging to the diagonal block
6248       Annz: number of unique nonzeros belonging to the diagonal block.
6249 
6250     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6251 
6252     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6253 */
6254 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6255 {
6256   PetscInt    cstart, cend, rstart, rend, row, col;
6257   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6258   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6259   PetscCount  k, m, p, q, r, s, mid;
6260   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6261 
6262   PetscFunctionBegin;
6263   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6264   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6265   m = rend - rstart;
6266 
6267   /* Skip negative rows */
6268   for (k = 0; k < n; k++)
6269     if (i[k] >= 0) break;
6270 
6271   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6272      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6273   */
6274   while (k < n) {
6275     row = i[k];
6276     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6277     for (s = k; s < n; s++)
6278       if (i[s] != row) break;
6279 
6280     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6281     for (p = k; p < s; p++) {
6282       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6283       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6284     }
6285     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6286     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6287     rowBegin[row - rstart] = k;
6288     rowMid[row - rstart]   = mid;
6289     rowEnd[row - rstart]   = s;
6290 
6291     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6292     Atot += mid - k;
6293     Btot += s - mid;
6294 
6295     /* Count unique nonzeros of this diag row */
6296     for (p = k; p < mid;) {
6297       col = j[p];
6298       do {
6299         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6300         p++;
6301       } while (p < mid && j[p] == col);
6302       Annz++;
6303     }
6304 
6305     /* Count unique nonzeros of this offdiag row */
6306     for (p = mid; p < s;) {
6307       col = j[p];
6308       do {
6309         p++;
6310       } while (p < s && j[p] == col);
6311       Bnnz++;
6312     }
6313     k = s;
6314   }
6315 
6316   /* Allocation according to Atot, Btot, Annz, Bnnz */
6317   PetscCall(PetscMalloc1(Atot, &Aperm));
6318   PetscCall(PetscMalloc1(Btot, &Bperm));
6319   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6320   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6321 
6322   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6323   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6324   for (r = 0; r < m; r++) {
6325     k   = rowBegin[r];
6326     mid = rowMid[r];
6327     s   = rowEnd[r];
6328     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6329     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6330     Atot += mid - k;
6331     Btot += s - mid;
6332 
6333     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6334     for (p = k; p < mid;) {
6335       col = j[p];
6336       q   = p;
6337       do {
6338         p++;
6339       } while (p < mid && j[p] == col);
6340       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6341       Annz++;
6342     }
6343 
6344     for (p = mid; p < s;) {
6345       col = j[p];
6346       q   = p;
6347       do {
6348         p++;
6349       } while (p < s && j[p] == col);
6350       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6351       Bnnz++;
6352     }
6353   }
6354   /* Output */
6355   *Aperm_ = Aperm;
6356   *Annz_  = Annz;
6357   *Atot_  = Atot;
6358   *Ajmap_ = Ajmap;
6359   *Bperm_ = Bperm;
6360   *Bnnz_  = Bnnz;
6361   *Btot_  = Btot;
6362   *Bjmap_ = Bjmap;
6363   PetscFunctionReturn(PETSC_SUCCESS);
6364 }
6365 
6366 /*
6367   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6368 
6369   Input Parameters:
6370     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6371     nnz:  number of unique nonzeros in the merged matrix
6372     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6373     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6374 
6375   Output Parameter: (memory is allocated by the caller)
6376     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6377 
6378   Example:
6379     nnz1 = 4
6380     nnz  = 6
6381     imap = [1,3,4,5]
6382     jmap = [0,3,5,6,7]
6383    then,
6384     jmap_new = [0,0,3,3,5,6,7]
6385 */
6386 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6387 {
6388   PetscCount k, p;
6389 
6390   PetscFunctionBegin;
6391   jmap_new[0] = 0;
6392   p           = nnz;                /* p loops over jmap_new[] backwards */
6393   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6394     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6395   }
6396   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6397   PetscFunctionReturn(PETSC_SUCCESS);
6398 }
6399 
6400 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
6401 {
6402   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
6403 
6404   PetscFunctionBegin;
6405   PetscCall(PetscSFDestroy(&coo->sf));
6406   PetscCall(PetscFree(coo->Aperm1));
6407   PetscCall(PetscFree(coo->Bperm1));
6408   PetscCall(PetscFree(coo->Ajmap1));
6409   PetscCall(PetscFree(coo->Bjmap1));
6410   PetscCall(PetscFree(coo->Aimap2));
6411   PetscCall(PetscFree(coo->Bimap2));
6412   PetscCall(PetscFree(coo->Aperm2));
6413   PetscCall(PetscFree(coo->Bperm2));
6414   PetscCall(PetscFree(coo->Ajmap2));
6415   PetscCall(PetscFree(coo->Bjmap2));
6416   PetscCall(PetscFree(coo->Cperm1));
6417   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6418   PetscCall(PetscFree(coo));
6419   PetscFunctionReturn(PETSC_SUCCESS);
6420 }
6421 
6422 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6423 {
6424   MPI_Comm             comm;
6425   PetscMPIInt          rank, size;
6426   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6427   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6428   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6429   PetscContainer       container;
6430   MatCOOStruct_MPIAIJ *coo;
6431 
6432   PetscFunctionBegin;
6433   PetscCall(PetscFree(mpiaij->garray));
6434   PetscCall(VecDestroy(&mpiaij->lvec));
6435 #if defined(PETSC_USE_CTABLE)
6436   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6437 #else
6438   PetscCall(PetscFree(mpiaij->colmap));
6439 #endif
6440   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6441   mat->assembled     = PETSC_FALSE;
6442   mat->was_assembled = PETSC_FALSE;
6443 
6444   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6445   PetscCallMPI(MPI_Comm_size(comm, &size));
6446   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6447   PetscCall(PetscLayoutSetUp(mat->rmap));
6448   PetscCall(PetscLayoutSetUp(mat->cmap));
6449   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6450   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6451   PetscCall(MatGetLocalSize(mat, &m, &n));
6452   PetscCall(MatGetSize(mat, &M, &N));
6453 
6454   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6455   /* entries come first, then local rows, then remote rows.                     */
6456   PetscCount n1 = coo_n, *perm1;
6457   PetscInt  *i1 = coo_i, *j1 = coo_j;
6458 
6459   PetscCall(PetscMalloc1(n1, &perm1));
6460   for (k = 0; k < n1; k++) perm1[k] = k;
6461 
6462   /* Manipulate indices so that entries with negative row or col indices will have smallest
6463      row indices, local entries will have greater but negative row indices, and remote entries
6464      will have positive row indices.
6465   */
6466   for (k = 0; k < n1; k++) {
6467     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6468     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6469     else {
6470       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6471       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6472     }
6473   }
6474 
6475   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6476   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6477 
6478   /* Advance k to the first entry we need to take care of */
6479   for (k = 0; k < n1; k++)
6480     if (i1[k] > PETSC_INT_MIN) break;
6481   PetscCount i1start = k;
6482 
6483   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6484   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6485 
6486   /*           Send remote rows to their owner                                  */
6487   /* Find which rows should be sent to which remote ranks*/
6488   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6489   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6490   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6491   const PetscInt *ranges;
6492   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6493 
6494   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6495   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6496   for (k = rem; k < n1;) {
6497     PetscMPIInt owner;
6498     PetscInt    firstRow, lastRow;
6499 
6500     /* Locate a row range */
6501     firstRow = i1[k]; /* first row of this owner */
6502     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6503     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6504 
6505     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6506     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6507 
6508     /* All entries in [k,p) belong to this remote owner */
6509     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6510       PetscMPIInt *sendto2;
6511       PetscInt    *nentries2;
6512       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6513 
6514       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6515       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6516       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6517       PetscCall(PetscFree2(sendto, nentries2));
6518       sendto   = sendto2;
6519       nentries = nentries2;
6520       maxNsend = maxNsend2;
6521     }
6522     sendto[nsend] = owner;
6523     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6524     nsend++;
6525     k = p;
6526   }
6527 
6528   /* Build 1st SF to know offsets on remote to send data */
6529   PetscSF      sf1;
6530   PetscInt     nroots = 1, nroots2 = 0;
6531   PetscInt     nleaves = nsend, nleaves2 = 0;
6532   PetscInt    *offsets;
6533   PetscSFNode *iremote;
6534 
6535   PetscCall(PetscSFCreate(comm, &sf1));
6536   PetscCall(PetscMalloc1(nsend, &iremote));
6537   PetscCall(PetscMalloc1(nsend, &offsets));
6538   for (k = 0; k < nsend; k++) {
6539     iremote[k].rank  = sendto[k];
6540     iremote[k].index = 0;
6541     nleaves2 += nentries[k];
6542     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6543   }
6544   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6545   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6546   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6547   PetscCall(PetscSFDestroy(&sf1));
6548   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6549 
6550   /* Build 2nd SF to send remote COOs to their owner */
6551   PetscSF sf2;
6552   nroots  = nroots2;
6553   nleaves = nleaves2;
6554   PetscCall(PetscSFCreate(comm, &sf2));
6555   PetscCall(PetscSFSetFromOptions(sf2));
6556   PetscCall(PetscMalloc1(nleaves, &iremote));
6557   p = 0;
6558   for (k = 0; k < nsend; k++) {
6559     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6560     for (q = 0; q < nentries[k]; q++, p++) {
6561       iremote[p].rank = sendto[k];
6562       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6563     }
6564   }
6565   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6566 
6567   /* Send the remote COOs to their owner */
6568   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6569   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6570   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6571   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6572   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6573   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6574   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6575   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6576   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6577   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6578   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6579 
6580   PetscCall(PetscFree(offsets));
6581   PetscCall(PetscFree2(sendto, nentries));
6582 
6583   /* Sort received COOs by row along with the permutation array     */
6584   for (k = 0; k < n2; k++) perm2[k] = k;
6585   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6586 
6587   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6588   PetscCount *Cperm1;
6589   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6590   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6591   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6592   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6593 
6594   /* Support for HYPRE matrices, kind of a hack.
6595      Swap min column with diagonal so that diagonal values will go first */
6596   PetscBool hypre;
6597   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6598   if (hypre) {
6599     PetscInt *minj;
6600     PetscBT   hasdiag;
6601 
6602     PetscCall(PetscBTCreate(m, &hasdiag));
6603     PetscCall(PetscMalloc1(m, &minj));
6604     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6605     for (k = i1start; k < rem; k++) {
6606       if (j1[k] < cstart || j1[k] >= cend) continue;
6607       const PetscInt rindex = i1[k] - rstart;
6608       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6609       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6610     }
6611     for (k = 0; k < n2; k++) {
6612       if (j2[k] < cstart || j2[k] >= cend) continue;
6613       const PetscInt rindex = i2[k] - rstart;
6614       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6615       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6616     }
6617     for (k = i1start; k < rem; k++) {
6618       const PetscInt rindex = i1[k] - rstart;
6619       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6620       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6621       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6622     }
6623     for (k = 0; k < n2; k++) {
6624       const PetscInt rindex = i2[k] - rstart;
6625       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6626       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6627       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6628     }
6629     PetscCall(PetscBTDestroy(&hasdiag));
6630     PetscCall(PetscFree(minj));
6631   }
6632 
6633   /* Split local COOs and received COOs into diag/offdiag portions */
6634   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6635   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6636   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6637   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6638   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6639   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6640 
6641   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6642   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6643   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6644   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6645 
6646   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6647   PetscInt *Ai, *Bi;
6648   PetscInt *Aj, *Bj;
6649 
6650   PetscCall(PetscMalloc1(m + 1, &Ai));
6651   PetscCall(PetscMalloc1(m + 1, &Bi));
6652   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6653   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6654 
6655   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6656   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6657   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6658   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6659   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6660 
6661   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6662   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6663 
6664   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6665   /* expect nonzeros in A/B most likely have local contributing entries        */
6666   PetscInt    Annz = Ai[m];
6667   PetscInt    Bnnz = Bi[m];
6668   PetscCount *Ajmap1_new, *Bjmap1_new;
6669 
6670   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6671   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6672 
6673   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6674   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6675 
6676   PetscCall(PetscFree(Aimap1));
6677   PetscCall(PetscFree(Ajmap1));
6678   PetscCall(PetscFree(Bimap1));
6679   PetscCall(PetscFree(Bjmap1));
6680   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6681   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6682   PetscCall(PetscFree(perm1));
6683   PetscCall(PetscFree3(i2, j2, perm2));
6684 
6685   Ajmap1 = Ajmap1_new;
6686   Bjmap1 = Bjmap1_new;
6687 
6688   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6689   if (Annz < Annz1 + Annz2) {
6690     PetscInt *Aj_new;
6691     PetscCall(PetscMalloc1(Annz, &Aj_new));
6692     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6693     PetscCall(PetscFree(Aj));
6694     Aj = Aj_new;
6695   }
6696 
6697   if (Bnnz < Bnnz1 + Bnnz2) {
6698     PetscInt *Bj_new;
6699     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6700     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6701     PetscCall(PetscFree(Bj));
6702     Bj = Bj_new;
6703   }
6704 
6705   /* Create new submatrices for on-process and off-process coupling                  */
6706   PetscScalar     *Aa, *Ba;
6707   MatType          rtype;
6708   Mat_SeqAIJ      *a, *b;
6709   PetscObjectState state;
6710   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6711   PetscCall(PetscCalloc1(Bnnz, &Ba));
6712   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6713   if (cstart) {
6714     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6715   }
6716 
6717   PetscCall(MatGetRootType_Private(mat, &rtype));
6718 
6719   MatSeqXAIJGetOptions_Private(mpiaij->A);
6720   PetscCall(MatDestroy(&mpiaij->A));
6721   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6722   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6723   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6724 
6725   MatSeqXAIJGetOptions_Private(mpiaij->B);
6726   PetscCall(MatDestroy(&mpiaij->B));
6727   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6728   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6729   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6730 
6731   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6732   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6733   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6734   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6735 
6736   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6737   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6738   a->free_a  = PETSC_TRUE;
6739   a->free_ij = PETSC_TRUE;
6740   b->free_a  = PETSC_TRUE;
6741   b->free_ij = PETSC_TRUE;
6742   a->maxnz   = a->nz;
6743   b->maxnz   = b->nz;
6744 
6745   /* conversion must happen AFTER multiply setup */
6746   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6747   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6748   PetscCall(VecDestroy(&mpiaij->lvec));
6749   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6750 
6751   // Put the COO struct in a container and then attach that to the matrix
6752   PetscCall(PetscMalloc1(1, &coo));
6753   coo->n       = coo_n;
6754   coo->sf      = sf2;
6755   coo->sendlen = nleaves;
6756   coo->recvlen = nroots;
6757   coo->Annz    = Annz;
6758   coo->Bnnz    = Bnnz;
6759   coo->Annz2   = Annz2;
6760   coo->Bnnz2   = Bnnz2;
6761   coo->Atot1   = Atot1;
6762   coo->Atot2   = Atot2;
6763   coo->Btot1   = Btot1;
6764   coo->Btot2   = Btot2;
6765   coo->Ajmap1  = Ajmap1;
6766   coo->Aperm1  = Aperm1;
6767   coo->Bjmap1  = Bjmap1;
6768   coo->Bperm1  = Bperm1;
6769   coo->Aimap2  = Aimap2;
6770   coo->Ajmap2  = Ajmap2;
6771   coo->Aperm2  = Aperm2;
6772   coo->Bimap2  = Bimap2;
6773   coo->Bjmap2  = Bjmap2;
6774   coo->Bperm2  = Bperm2;
6775   coo->Cperm1  = Cperm1;
6776   // Allocate in preallocation. If not used, it has zero cost on host
6777   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6778   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6779   PetscCall(PetscContainerSetPointer(container, coo));
6780   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6781   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6782   PetscCall(PetscContainerDestroy(&container));
6783   PetscFunctionReturn(PETSC_SUCCESS);
6784 }
6785 
6786 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6787 {
6788   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6789   Mat                  A = mpiaij->A, B = mpiaij->B;
6790   PetscScalar         *Aa, *Ba;
6791   PetscScalar         *sendbuf, *recvbuf;
6792   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6793   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6794   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6795   const PetscCount    *Cperm1;
6796   PetscContainer       container;
6797   MatCOOStruct_MPIAIJ *coo;
6798 
6799   PetscFunctionBegin;
6800   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6801   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6802   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6803   sendbuf = coo->sendbuf;
6804   recvbuf = coo->recvbuf;
6805   Ajmap1  = coo->Ajmap1;
6806   Ajmap2  = coo->Ajmap2;
6807   Aimap2  = coo->Aimap2;
6808   Bjmap1  = coo->Bjmap1;
6809   Bjmap2  = coo->Bjmap2;
6810   Bimap2  = coo->Bimap2;
6811   Aperm1  = coo->Aperm1;
6812   Aperm2  = coo->Aperm2;
6813   Bperm1  = coo->Bperm1;
6814   Bperm2  = coo->Bperm2;
6815   Cperm1  = coo->Cperm1;
6816 
6817   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6818   PetscCall(MatSeqAIJGetArray(B, &Ba));
6819 
6820   /* Pack entries to be sent to remote */
6821   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6822 
6823   /* Send remote entries to their owner and overlap the communication with local computation */
6824   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6825   /* Add local entries to A and B */
6826   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6827     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6828     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6829     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6830   }
6831   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6832     PetscScalar sum = 0.0;
6833     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6834     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6835   }
6836   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6837 
6838   /* Add received remote entries to A and B */
6839   for (PetscCount i = 0; i < coo->Annz2; i++) {
6840     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6841   }
6842   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6843     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6844   }
6845   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6846   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6847   PetscFunctionReturn(PETSC_SUCCESS);
6848 }
6849 
6850 /*MC
6851    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6852 
6853    Options Database Keys:
6854 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6855 
6856    Level: beginner
6857 
6858    Notes:
6859    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6860     in this case the values associated with the rows and columns one passes in are set to zero
6861     in the matrix
6862 
6863     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6864     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6865 
6866 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6867 M*/
6868 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6869 {
6870   Mat_MPIAIJ *b;
6871   PetscMPIInt size;
6872 
6873   PetscFunctionBegin;
6874   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6875 
6876   PetscCall(PetscNew(&b));
6877   B->data       = (void *)b;
6878   B->ops[0]     = MatOps_Values;
6879   B->assembled  = PETSC_FALSE;
6880   B->insertmode = NOT_SET_VALUES;
6881   b->size       = size;
6882 
6883   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6884 
6885   /* build cache for off array entries formed */
6886   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6887 
6888   b->donotstash  = PETSC_FALSE;
6889   b->colmap      = NULL;
6890   b->garray      = NULL;
6891   b->roworiented = PETSC_TRUE;
6892 
6893   /* stuff used for matrix vector multiply */
6894   b->lvec  = NULL;
6895   b->Mvctx = NULL;
6896 
6897   /* stuff for MatGetRow() */
6898   b->rowindices   = NULL;
6899   b->rowvalues    = NULL;
6900   b->getrowactive = PETSC_FALSE;
6901 
6902   /* flexible pointer used in CUSPARSE classes */
6903   b->spptr = NULL;
6904 
6905   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6906   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6907   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6908   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6909   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6910   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6911   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6912   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6913   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6914   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6915   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6916 #if defined(PETSC_HAVE_CUDA)
6917   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6918 #endif
6919 #if defined(PETSC_HAVE_HIP)
6920   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6921 #endif
6922 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6923   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6924 #endif
6925 #if defined(PETSC_HAVE_MKL_SPARSE)
6926   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6927 #endif
6928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6932 #if defined(PETSC_HAVE_ELEMENTAL)
6933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6934 #endif
6935 #if defined(PETSC_HAVE_SCALAPACK)
6936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6937 #endif
6938   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6940 #if defined(PETSC_HAVE_HYPRE)
6941   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6942   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6943 #endif
6944   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6945   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6946   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6947   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6948   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6949   PetscFunctionReturn(PETSC_SUCCESS);
6950 }
6951 
6952 /*@
6953   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6954   and "off-diagonal" part of the matrix in CSR format.
6955 
6956   Collective
6957 
6958   Input Parameters:
6959 + comm - MPI communicator
6960 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6961 . n    - This value should be the same as the local size used in creating the
6962          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6963          calculated if `N` is given) For square matrices `n` is almost always `m`.
6964 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6965 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6966 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6967 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6968 . a    - matrix values
6969 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6970 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6971 - oa   - matrix values
6972 
6973   Output Parameter:
6974 . mat - the matrix
6975 
6976   Level: advanced
6977 
6978   Notes:
6979   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6980   must free the arrays once the matrix has been destroyed and not before.
6981 
6982   The `i` and `j` indices are 0 based
6983 
6984   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6985 
6986   This sets local rows and cannot be used to set off-processor values.
6987 
6988   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6989   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6990   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6991   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6992   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6993   communication if it is known that only local entries will be set.
6994 
6995 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6996           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6997 @*/
6998 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6999 {
7000   Mat_MPIAIJ *maij;
7001 
7002   PetscFunctionBegin;
7003   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
7004   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
7005   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
7006   PetscCall(MatCreate(comm, mat));
7007   PetscCall(MatSetSizes(*mat, m, n, M, N));
7008   PetscCall(MatSetType(*mat, MATMPIAIJ));
7009   maij = (Mat_MPIAIJ *)(*mat)->data;
7010 
7011   (*mat)->preallocated = PETSC_TRUE;
7012 
7013   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7014   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7015 
7016   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7017   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7018 
7019   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7020   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7021   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7022   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7023   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7024   PetscFunctionReturn(PETSC_SUCCESS);
7025 }
7026 
7027 typedef struct {
7028   Mat       *mp;    /* intermediate products */
7029   PetscBool *mptmp; /* is the intermediate product temporary ? */
7030   PetscInt   cp;    /* number of intermediate products */
7031 
7032   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7033   PetscInt    *startsj_s, *startsj_r;
7034   PetscScalar *bufa;
7035   Mat          P_oth;
7036 
7037   /* may take advantage of merging product->B */
7038   Mat Bloc; /* B-local by merging diag and off-diag */
7039 
7040   /* cusparse does not have support to split between symbolic and numeric phases.
7041      When api_user is true, we don't need to update the numerical values
7042      of the temporary storage */
7043   PetscBool reusesym;
7044 
7045   /* support for COO values insertion */
7046   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7047   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7048   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7049   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7050   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7051   PetscMemType mtype;
7052 
7053   /* customization */
7054   PetscBool abmerge;
7055   PetscBool P_oth_bind;
7056 } MatMatMPIAIJBACKEND;
7057 
7058 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7059 {
7060   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7061   PetscInt             i;
7062 
7063   PetscFunctionBegin;
7064   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7065   PetscCall(PetscFree(mmdata->bufa));
7066   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7067   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7068   PetscCall(MatDestroy(&mmdata->P_oth));
7069   PetscCall(MatDestroy(&mmdata->Bloc));
7070   PetscCall(PetscSFDestroy(&mmdata->sf));
7071   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7072   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7073   PetscCall(PetscFree(mmdata->own[0]));
7074   PetscCall(PetscFree(mmdata->own));
7075   PetscCall(PetscFree(mmdata->off[0]));
7076   PetscCall(PetscFree(mmdata->off));
7077   PetscCall(PetscFree(mmdata));
7078   PetscFunctionReturn(PETSC_SUCCESS);
7079 }
7080 
7081 /* Copy selected n entries with indices in idx[] of A to v[].
7082    If idx is NULL, copy the whole data array of A to v[]
7083  */
7084 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7085 {
7086   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7087 
7088   PetscFunctionBegin;
7089   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7090   if (f) {
7091     PetscCall((*f)(A, n, idx, v));
7092   } else {
7093     const PetscScalar *vv;
7094 
7095     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7096     if (n && idx) {
7097       PetscScalar    *w  = v;
7098       const PetscInt *oi = idx;
7099       PetscInt        j;
7100 
7101       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7102     } else {
7103       PetscCall(PetscArraycpy(v, vv, n));
7104     }
7105     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7106   }
7107   PetscFunctionReturn(PETSC_SUCCESS);
7108 }
7109 
7110 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7111 {
7112   MatMatMPIAIJBACKEND *mmdata;
7113   PetscInt             i, n_d, n_o;
7114 
7115   PetscFunctionBegin;
7116   MatCheckProduct(C, 1);
7117   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7118   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7119   if (!mmdata->reusesym) { /* update temporary matrices */
7120     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7121     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7122   }
7123   mmdata->reusesym = PETSC_FALSE;
7124 
7125   for (i = 0; i < mmdata->cp; i++) {
7126     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7127     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7128   }
7129   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7130     PetscInt noff;
7131 
7132     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7133     if (mmdata->mptmp[i]) continue;
7134     if (noff) {
7135       PetscInt nown;
7136 
7137       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7138       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7139       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7140       n_o += noff;
7141       n_d += nown;
7142     } else {
7143       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7144 
7145       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7146       n_d += mm->nz;
7147     }
7148   }
7149   if (mmdata->hasoffproc) { /* offprocess insertion */
7150     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7151     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7152   }
7153   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7154   PetscFunctionReturn(PETSC_SUCCESS);
7155 }
7156 
7157 /* Support for Pt * A, A * P, or Pt * A * P */
7158 #define MAX_NUMBER_INTERMEDIATE 4
7159 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7160 {
7161   Mat_Product           *product = C->product;
7162   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7163   Mat_MPIAIJ            *a, *p;
7164   MatMatMPIAIJBACKEND   *mmdata;
7165   ISLocalToGlobalMapping P_oth_l2g = NULL;
7166   IS                     glob      = NULL;
7167   const char            *prefix;
7168   char                   pprefix[256];
7169   const PetscInt        *globidx, *P_oth_idx;
7170   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7171   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7172   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7173                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7174                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7175   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7176 
7177   MatProductType ptype;
7178   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7179   PetscMPIInt    size;
7180 
7181   PetscFunctionBegin;
7182   MatCheckProduct(C, 1);
7183   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7184   ptype = product->type;
7185   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7186     ptype                                          = MATPRODUCT_AB;
7187     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7188   }
7189   switch (ptype) {
7190   case MATPRODUCT_AB:
7191     A          = product->A;
7192     P          = product->B;
7193     m          = A->rmap->n;
7194     n          = P->cmap->n;
7195     M          = A->rmap->N;
7196     N          = P->cmap->N;
7197     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7198     break;
7199   case MATPRODUCT_AtB:
7200     P          = product->A;
7201     A          = product->B;
7202     m          = P->cmap->n;
7203     n          = A->cmap->n;
7204     M          = P->cmap->N;
7205     N          = A->cmap->N;
7206     hasoffproc = PETSC_TRUE;
7207     break;
7208   case MATPRODUCT_PtAP:
7209     A          = product->A;
7210     P          = product->B;
7211     m          = P->cmap->n;
7212     n          = P->cmap->n;
7213     M          = P->cmap->N;
7214     N          = P->cmap->N;
7215     hasoffproc = PETSC_TRUE;
7216     break;
7217   default:
7218     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7219   }
7220   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7221   if (size == 1) hasoffproc = PETSC_FALSE;
7222 
7223   /* defaults */
7224   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7225     mp[i]    = NULL;
7226     mptmp[i] = PETSC_FALSE;
7227     rmapt[i] = -1;
7228     cmapt[i] = -1;
7229     rmapa[i] = NULL;
7230     cmapa[i] = NULL;
7231   }
7232 
7233   /* customization */
7234   PetscCall(PetscNew(&mmdata));
7235   mmdata->reusesym = product->api_user;
7236   if (ptype == MATPRODUCT_AB) {
7237     if (product->api_user) {
7238       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7239       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7240       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7241       PetscOptionsEnd();
7242     } else {
7243       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7244       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7245       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7246       PetscOptionsEnd();
7247     }
7248   } else if (ptype == MATPRODUCT_PtAP) {
7249     if (product->api_user) {
7250       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7251       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7252       PetscOptionsEnd();
7253     } else {
7254       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7255       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7256       PetscOptionsEnd();
7257     }
7258   }
7259   a = (Mat_MPIAIJ *)A->data;
7260   p = (Mat_MPIAIJ *)P->data;
7261   PetscCall(MatSetSizes(C, m, n, M, N));
7262   PetscCall(PetscLayoutSetUp(C->rmap));
7263   PetscCall(PetscLayoutSetUp(C->cmap));
7264   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7265   PetscCall(MatGetOptionsPrefix(C, &prefix));
7266 
7267   cp = 0;
7268   switch (ptype) {
7269   case MATPRODUCT_AB: /* A * P */
7270     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7271 
7272     /* A_diag * P_local (merged or not) */
7273     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7274       /* P is product->B */
7275       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7276       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7277       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7278       PetscCall(MatProductSetFill(mp[cp], product->fill));
7279       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7280       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7281       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7282       mp[cp]->product->api_user = product->api_user;
7283       PetscCall(MatProductSetFromOptions(mp[cp]));
7284       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7285       PetscCall(ISGetIndices(glob, &globidx));
7286       rmapt[cp] = 1;
7287       cmapt[cp] = 2;
7288       cmapa[cp] = globidx;
7289       mptmp[cp] = PETSC_FALSE;
7290       cp++;
7291     } else { /* A_diag * P_diag and A_diag * P_off */
7292       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7293       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7294       PetscCall(MatProductSetFill(mp[cp], product->fill));
7295       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7296       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7297       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7298       mp[cp]->product->api_user = product->api_user;
7299       PetscCall(MatProductSetFromOptions(mp[cp]));
7300       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7301       rmapt[cp] = 1;
7302       cmapt[cp] = 1;
7303       mptmp[cp] = PETSC_FALSE;
7304       cp++;
7305       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7306       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7307       PetscCall(MatProductSetFill(mp[cp], product->fill));
7308       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7309       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7310       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7311       mp[cp]->product->api_user = product->api_user;
7312       PetscCall(MatProductSetFromOptions(mp[cp]));
7313       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7314       rmapt[cp] = 1;
7315       cmapt[cp] = 2;
7316       cmapa[cp] = p->garray;
7317       mptmp[cp] = PETSC_FALSE;
7318       cp++;
7319     }
7320 
7321     /* A_off * P_other */
7322     if (mmdata->P_oth) {
7323       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7324       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7325       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7326       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7327       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7328       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7329       PetscCall(MatProductSetFill(mp[cp], product->fill));
7330       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7331       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7332       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7333       mp[cp]->product->api_user = product->api_user;
7334       PetscCall(MatProductSetFromOptions(mp[cp]));
7335       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7336       rmapt[cp] = 1;
7337       cmapt[cp] = 2;
7338       cmapa[cp] = P_oth_idx;
7339       mptmp[cp] = PETSC_FALSE;
7340       cp++;
7341     }
7342     break;
7343 
7344   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7345     /* A is product->B */
7346     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7347     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7348       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7349       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7350       PetscCall(MatProductSetFill(mp[cp], product->fill));
7351       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7352       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7353       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7354       mp[cp]->product->api_user = product->api_user;
7355       PetscCall(MatProductSetFromOptions(mp[cp]));
7356       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7357       PetscCall(ISGetIndices(glob, &globidx));
7358       rmapt[cp] = 2;
7359       rmapa[cp] = globidx;
7360       cmapt[cp] = 2;
7361       cmapa[cp] = globidx;
7362       mptmp[cp] = PETSC_FALSE;
7363       cp++;
7364     } else {
7365       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7366       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7367       PetscCall(MatProductSetFill(mp[cp], product->fill));
7368       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7369       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7370       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7371       mp[cp]->product->api_user = product->api_user;
7372       PetscCall(MatProductSetFromOptions(mp[cp]));
7373       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7374       PetscCall(ISGetIndices(glob, &globidx));
7375       rmapt[cp] = 1;
7376       cmapt[cp] = 2;
7377       cmapa[cp] = globidx;
7378       mptmp[cp] = PETSC_FALSE;
7379       cp++;
7380       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7381       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7382       PetscCall(MatProductSetFill(mp[cp], product->fill));
7383       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7384       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7385       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7386       mp[cp]->product->api_user = product->api_user;
7387       PetscCall(MatProductSetFromOptions(mp[cp]));
7388       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7389       rmapt[cp] = 2;
7390       rmapa[cp] = p->garray;
7391       cmapt[cp] = 2;
7392       cmapa[cp] = globidx;
7393       mptmp[cp] = PETSC_FALSE;
7394       cp++;
7395     }
7396     break;
7397   case MATPRODUCT_PtAP:
7398     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7399     /* P is product->B */
7400     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7401     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7402     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7403     PetscCall(MatProductSetFill(mp[cp], product->fill));
7404     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7405     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7406     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7407     mp[cp]->product->api_user = product->api_user;
7408     PetscCall(MatProductSetFromOptions(mp[cp]));
7409     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7410     PetscCall(ISGetIndices(glob, &globidx));
7411     rmapt[cp] = 2;
7412     rmapa[cp] = globidx;
7413     cmapt[cp] = 2;
7414     cmapa[cp] = globidx;
7415     mptmp[cp] = PETSC_FALSE;
7416     cp++;
7417     if (mmdata->P_oth) {
7418       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7419       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7420       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7421       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7422       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7423       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7424       PetscCall(MatProductSetFill(mp[cp], product->fill));
7425       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7426       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7427       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7428       mp[cp]->product->api_user = product->api_user;
7429       PetscCall(MatProductSetFromOptions(mp[cp]));
7430       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7431       mptmp[cp] = PETSC_TRUE;
7432       cp++;
7433       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7434       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7435       PetscCall(MatProductSetFill(mp[cp], product->fill));
7436       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7437       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7438       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7439       mp[cp]->product->api_user = product->api_user;
7440       PetscCall(MatProductSetFromOptions(mp[cp]));
7441       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7442       rmapt[cp] = 2;
7443       rmapa[cp] = globidx;
7444       cmapt[cp] = 2;
7445       cmapa[cp] = P_oth_idx;
7446       mptmp[cp] = PETSC_FALSE;
7447       cp++;
7448     }
7449     break;
7450   default:
7451     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7452   }
7453   /* sanity check */
7454   if (size > 1)
7455     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7456 
7457   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7458   for (i = 0; i < cp; i++) {
7459     mmdata->mp[i]    = mp[i];
7460     mmdata->mptmp[i] = mptmp[i];
7461   }
7462   mmdata->cp             = cp;
7463   C->product->data       = mmdata;
7464   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7465   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7466 
7467   /* memory type */
7468   mmdata->mtype = PETSC_MEMTYPE_HOST;
7469   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7470   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7471   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7472   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7473   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7474   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7475 
7476   /* prepare coo coordinates for values insertion */
7477 
7478   /* count total nonzeros of those intermediate seqaij Mats
7479     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7480     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7481     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7482   */
7483   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7484     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7485     if (mptmp[cp]) continue;
7486     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7487       const PetscInt *rmap = rmapa[cp];
7488       const PetscInt  mr   = mp[cp]->rmap->n;
7489       const PetscInt  rs   = C->rmap->rstart;
7490       const PetscInt  re   = C->rmap->rend;
7491       const PetscInt *ii   = mm->i;
7492       for (i = 0; i < mr; i++) {
7493         const PetscInt gr = rmap[i];
7494         const PetscInt nz = ii[i + 1] - ii[i];
7495         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7496         else ncoo_oown += nz;                  /* this row is local */
7497       }
7498     } else ncoo_d += mm->nz;
7499   }
7500 
7501   /*
7502     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7503 
7504     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7505 
7506     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7507 
7508     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7509     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7510     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7511 
7512     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7513     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7514   */
7515   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7516   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7517 
7518   /* gather (i,j) of nonzeros inserted by remote procs */
7519   if (hasoffproc) {
7520     PetscSF  msf;
7521     PetscInt ncoo2, *coo_i2, *coo_j2;
7522 
7523     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7524     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7525     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7526 
7527     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7528       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7529       PetscInt   *idxoff = mmdata->off[cp];
7530       PetscInt   *idxown = mmdata->own[cp];
7531       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7532         const PetscInt *rmap = rmapa[cp];
7533         const PetscInt *cmap = cmapa[cp];
7534         const PetscInt *ii   = mm->i;
7535         PetscInt       *coi  = coo_i + ncoo_o;
7536         PetscInt       *coj  = coo_j + ncoo_o;
7537         const PetscInt  mr   = mp[cp]->rmap->n;
7538         const PetscInt  rs   = C->rmap->rstart;
7539         const PetscInt  re   = C->rmap->rend;
7540         const PetscInt  cs   = C->cmap->rstart;
7541         for (i = 0; i < mr; i++) {
7542           const PetscInt *jj = mm->j + ii[i];
7543           const PetscInt  gr = rmap[i];
7544           const PetscInt  nz = ii[i + 1] - ii[i];
7545           if (gr < rs || gr >= re) { /* this is an offproc row */
7546             for (j = ii[i]; j < ii[i + 1]; j++) {
7547               *coi++    = gr;
7548               *idxoff++ = j;
7549             }
7550             if (!cmapt[cp]) { /* already global */
7551               for (j = 0; j < nz; j++) *coj++ = jj[j];
7552             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7553               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7554             } else { /* offdiag */
7555               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7556             }
7557             ncoo_o += nz;
7558           } else { /* this is a local row */
7559             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7560           }
7561         }
7562       }
7563       mmdata->off[cp + 1] = idxoff;
7564       mmdata->own[cp + 1] = idxown;
7565     }
7566 
7567     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7568     PetscInt incoo_o;
7569     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7570     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7571     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7572     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7573     ncoo = ncoo_d + ncoo_oown + ncoo2;
7574     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7575     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7576     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7577     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7578     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7579     PetscCall(PetscFree2(coo_i, coo_j));
7580     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7581     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7582     coo_i = coo_i2;
7583     coo_j = coo_j2;
7584   } else { /* no offproc values insertion */
7585     ncoo = ncoo_d;
7586     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7587 
7588     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7589     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7590     PetscCall(PetscSFSetUp(mmdata->sf));
7591   }
7592   mmdata->hasoffproc = hasoffproc;
7593 
7594   /* gather (i,j) of nonzeros inserted locally */
7595   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7596     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7597     PetscInt       *coi  = coo_i + ncoo_d;
7598     PetscInt       *coj  = coo_j + ncoo_d;
7599     const PetscInt *jj   = mm->j;
7600     const PetscInt *ii   = mm->i;
7601     const PetscInt *cmap = cmapa[cp];
7602     const PetscInt *rmap = rmapa[cp];
7603     const PetscInt  mr   = mp[cp]->rmap->n;
7604     const PetscInt  rs   = C->rmap->rstart;
7605     const PetscInt  re   = C->rmap->rend;
7606     const PetscInt  cs   = C->cmap->rstart;
7607 
7608     if (mptmp[cp]) continue;
7609     if (rmapt[cp] == 1) { /* consecutive rows */
7610       /* fill coo_i */
7611       for (i = 0; i < mr; i++) {
7612         const PetscInt gr = i + rs;
7613         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7614       }
7615       /* fill coo_j */
7616       if (!cmapt[cp]) { /* type-0, already global */
7617         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7618       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7619         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7620       } else {                                            /* type-2, local to global for sparse columns */
7621         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7622       }
7623       ncoo_d += mm->nz;
7624     } else if (rmapt[cp] == 2) { /* sparse rows */
7625       for (i = 0; i < mr; i++) {
7626         const PetscInt *jj = mm->j + ii[i];
7627         const PetscInt  gr = rmap[i];
7628         const PetscInt  nz = ii[i + 1] - ii[i];
7629         if (gr >= rs && gr < re) { /* local rows */
7630           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7631           if (!cmapt[cp]) { /* type-0, already global */
7632             for (j = 0; j < nz; j++) *coj++ = jj[j];
7633           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7634             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7635           } else { /* type-2, local to global for sparse columns */
7636             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7637           }
7638           ncoo_d += nz;
7639         }
7640       }
7641     }
7642   }
7643   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7644   PetscCall(ISDestroy(&glob));
7645   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7646   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7647   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7648   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7649 
7650   /* set block sizes */
7651   A = product->A;
7652   P = product->B;
7653   switch (ptype) {
7654   case MATPRODUCT_PtAP:
7655     if (P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs));
7656     break;
7657   case MATPRODUCT_RARt:
7658     if (P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs));
7659     break;
7660   case MATPRODUCT_ABC:
7661     PetscCall(MatSetBlockSizesFromMats(C, A, product->C));
7662     break;
7663   case MATPRODUCT_AB:
7664     PetscCall(MatSetBlockSizesFromMats(C, A, P));
7665     break;
7666   case MATPRODUCT_AtB:
7667     if (A->cmap->bs > 1 || P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs));
7668     break;
7669   case MATPRODUCT_ABt:
7670     if (A->rmap->bs > 1 || P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs));
7671     break;
7672   default:
7673     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]);
7674   }
7675 
7676   /* preallocate with COO data */
7677   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7678   PetscCall(PetscFree2(coo_i, coo_j));
7679   PetscFunctionReturn(PETSC_SUCCESS);
7680 }
7681 
7682 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7683 {
7684   Mat_Product *product = mat->product;
7685 #if defined(PETSC_HAVE_DEVICE)
7686   PetscBool match  = PETSC_FALSE;
7687   PetscBool usecpu = PETSC_FALSE;
7688 #else
7689   PetscBool match = PETSC_TRUE;
7690 #endif
7691 
7692   PetscFunctionBegin;
7693   MatCheckProduct(mat, 1);
7694 #if defined(PETSC_HAVE_DEVICE)
7695   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7696   if (match) { /* we can always fallback to the CPU if requested */
7697     switch (product->type) {
7698     case MATPRODUCT_AB:
7699       if (product->api_user) {
7700         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7701         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7702         PetscOptionsEnd();
7703       } else {
7704         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7705         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7706         PetscOptionsEnd();
7707       }
7708       break;
7709     case MATPRODUCT_AtB:
7710       if (product->api_user) {
7711         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7712         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7713         PetscOptionsEnd();
7714       } else {
7715         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7716         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7717         PetscOptionsEnd();
7718       }
7719       break;
7720     case MATPRODUCT_PtAP:
7721       if (product->api_user) {
7722         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7723         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7724         PetscOptionsEnd();
7725       } else {
7726         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7727         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7728         PetscOptionsEnd();
7729       }
7730       break;
7731     default:
7732       break;
7733     }
7734     match = (PetscBool)!usecpu;
7735   }
7736 #endif
7737   if (match) {
7738     switch (product->type) {
7739     case MATPRODUCT_AB:
7740     case MATPRODUCT_AtB:
7741     case MATPRODUCT_PtAP:
7742       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7743       break;
7744     default:
7745       break;
7746     }
7747   }
7748   /* fallback to MPIAIJ ops */
7749   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7750   PetscFunctionReturn(PETSC_SUCCESS);
7751 }
7752 
7753 /*
7754    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7755 
7756    n - the number of block indices in cc[]
7757    cc - the block indices (must be large enough to contain the indices)
7758 */
7759 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7760 {
7761   PetscInt        cnt = -1, nidx, j;
7762   const PetscInt *idx;
7763 
7764   PetscFunctionBegin;
7765   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7766   if (nidx) {
7767     cnt     = 0;
7768     cc[cnt] = idx[0] / bs;
7769     for (j = 1; j < nidx; j++) {
7770       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7771     }
7772   }
7773   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7774   *n = cnt + 1;
7775   PetscFunctionReturn(PETSC_SUCCESS);
7776 }
7777 
7778 /*
7779     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7780 
7781     ncollapsed - the number of block indices
7782     collapsed - the block indices (must be large enough to contain the indices)
7783 */
7784 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7785 {
7786   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7787 
7788   PetscFunctionBegin;
7789   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7790   for (i = start + 1; i < start + bs; i++) {
7791     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7792     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7793     cprevtmp = cprev;
7794     cprev    = merged;
7795     merged   = cprevtmp;
7796   }
7797   *ncollapsed = nprev;
7798   if (collapsed) *collapsed = cprev;
7799   PetscFunctionReturn(PETSC_SUCCESS);
7800 }
7801 
7802 /*
7803  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7804 
7805  Input Parameter:
7806  . Amat - matrix
7807  - symmetrize - make the result symmetric
7808  + scale - scale with diagonal
7809 
7810  Output Parameter:
7811  . a_Gmat - output scalar graph >= 0
7812 
7813 */
7814 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7815 {
7816   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7817   MPI_Comm  comm;
7818   Mat       Gmat;
7819   PetscBool ismpiaij, isseqaij;
7820   Mat       a, b, c;
7821   MatType   jtype;
7822 
7823   PetscFunctionBegin;
7824   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7825   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7826   PetscCall(MatGetSize(Amat, &MM, &NN));
7827   PetscCall(MatGetBlockSize(Amat, &bs));
7828   nloc = (Iend - Istart) / bs;
7829 
7830   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7831   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7832   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7833 
7834   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7835   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7836      implementation */
7837   if (bs > 1) {
7838     PetscCall(MatGetType(Amat, &jtype));
7839     PetscCall(MatCreate(comm, &Gmat));
7840     PetscCall(MatSetType(Gmat, jtype));
7841     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7842     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7843     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7844       PetscInt  *d_nnz, *o_nnz;
7845       MatScalar *aa, val, *AA;
7846       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7847 
7848       if (isseqaij) {
7849         a = Amat;
7850         b = NULL;
7851       } else {
7852         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7853         a             = d->A;
7854         b             = d->B;
7855       }
7856       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7857       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7858       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7859         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7860         const PetscInt *cols1, *cols2;
7861 
7862         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7863           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7864           nnz[brow / bs] = nc2 / bs;
7865           if (nc2 % bs) ok = 0;
7866           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7867           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7868             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7869             if (nc1 != nc2) ok = 0;
7870             else {
7871               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7872                 if (cols1[jj] != cols2[jj]) ok = 0;
7873                 if (cols1[jj] % bs != jj % bs) ok = 0;
7874               }
7875             }
7876             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7877           }
7878           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7879           if (!ok) {
7880             PetscCall(PetscFree2(d_nnz, o_nnz));
7881             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7882             goto old_bs;
7883           }
7884         }
7885       }
7886       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7887       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7888       PetscCall(PetscFree2(d_nnz, o_nnz));
7889       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7890       // diag
7891       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7892         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7893 
7894         ai = aseq->i;
7895         n  = ai[brow + 1] - ai[brow];
7896         aj = aseq->j + ai[brow];
7897         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7898           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7899           val        = 0;
7900           if (index_size == 0) {
7901             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7902               aa = aseq->a + ai[brow + ii] + k;
7903               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7904                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7905               }
7906             }
7907           } else {                                            // use (index,index) value if provided
7908             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7909               PetscInt ii = index[iii];
7910               aa          = aseq->a + ai[brow + ii] + k;
7911               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7912                 PetscInt jj = index[jjj];
7913                 val += PetscAbs(PetscRealPart(aa[jj]));
7914               }
7915             }
7916           }
7917           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7918           AA[k / bs] = val;
7919         }
7920         grow = Istart / bs + brow / bs;
7921         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7922       }
7923       // off-diag
7924       if (ismpiaij) {
7925         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7926         const PetscScalar *vals;
7927         const PetscInt    *cols, *garray = aij->garray;
7928 
7929         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7930         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7931           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7932           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7933             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7934             AA[k / bs] = 0;
7935             AJ[cidx]   = garray[cols[k]] / bs;
7936           }
7937           nc = ncols / bs;
7938           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7939           if (index_size == 0) {
7940             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7941               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7942               for (PetscInt k = 0; k < ncols; k += bs) {
7943                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7944                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7945                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7946                 }
7947               }
7948               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7949             }
7950           } else {                                            // use (index,index) value if provided
7951             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7952               PetscInt ii = index[iii];
7953               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7954               for (PetscInt k = 0; k < ncols; k += bs) {
7955                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7956                   PetscInt jj = index[jjj];
7957                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7958                 }
7959               }
7960               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7961             }
7962           }
7963           grow = Istart / bs + brow / bs;
7964           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7965         }
7966       }
7967       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7968       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7969       PetscCall(PetscFree2(AA, AJ));
7970     } else {
7971       const PetscScalar *vals;
7972       const PetscInt    *idx;
7973       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7974     old_bs:
7975       /*
7976        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7977        */
7978       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7979       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7980       if (isseqaij) {
7981         PetscInt max_d_nnz;
7982 
7983         /*
7984          Determine exact preallocation count for (sequential) scalar matrix
7985          */
7986         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7987         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7988         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7989         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7990         PetscCall(PetscFree3(w0, w1, w2));
7991       } else if (ismpiaij) {
7992         Mat             Daij, Oaij;
7993         const PetscInt *garray;
7994         PetscInt        max_d_nnz;
7995 
7996         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7997         /*
7998          Determine exact preallocation count for diagonal block portion of scalar matrix
7999          */
8000         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
8001         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
8002         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
8003         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
8004         PetscCall(PetscFree3(w0, w1, w2));
8005         /*
8006          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
8007          */
8008         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
8009           o_nnz[jj] = 0;
8010           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
8011             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
8012             o_nnz[jj] += ncols;
8013             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
8014           }
8015           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
8016         }
8017       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
8018       /* get scalar copy (norms) of matrix */
8019       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
8020       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
8021       PetscCall(PetscFree2(d_nnz, o_nnz));
8022       for (Ii = Istart; Ii < Iend; Ii++) {
8023         PetscInt dest_row = Ii / bs;
8024 
8025         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
8026         for (jj = 0; jj < ncols; jj++) {
8027           PetscInt    dest_col = idx[jj] / bs;
8028           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
8029 
8030           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
8031         }
8032         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
8033       }
8034       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8035       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8036     }
8037   } else {
8038     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8039     else {
8040       Gmat = Amat;
8041       PetscCall(PetscObjectReference((PetscObject)Gmat));
8042     }
8043     if (isseqaij) {
8044       a = Gmat;
8045       b = NULL;
8046     } else {
8047       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8048       a             = d->A;
8049       b             = d->B;
8050     }
8051     if (filter >= 0 || scale) {
8052       /* take absolute value of each entry */
8053       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8054         MatInfo      info;
8055         PetscScalar *avals;
8056 
8057         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8058         PetscCall(MatSeqAIJGetArray(c, &avals));
8059         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8060         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8061       }
8062     }
8063   }
8064   if (symmetrize) {
8065     PetscBool isset, issym;
8066 
8067     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8068     if (!isset || !issym) {
8069       Mat matTrans;
8070 
8071       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8072       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8073       PetscCall(MatDestroy(&matTrans));
8074     }
8075     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8076   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8077   if (scale) {
8078     /* scale c for all diagonal values = 1 or -1 */
8079     Vec diag;
8080 
8081     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8082     PetscCall(MatGetDiagonal(Gmat, diag));
8083     PetscCall(VecReciprocal(diag));
8084     PetscCall(VecSqrtAbs(diag));
8085     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8086     PetscCall(VecDestroy(&diag));
8087   }
8088   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8089   if (filter >= 0) {
8090     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8091     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8092   }
8093   *a_Gmat = Gmat;
8094   PetscFunctionReturn(PETSC_SUCCESS);
8095 }
8096 
8097 /*
8098     Special version for direct calls from Fortran
8099 */
8100 
8101 /* Change these macros so can be used in void function */
8102 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8103 #undef PetscCall
8104 #define PetscCall(...) \
8105   do { \
8106     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8107     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8108       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8109       return; \
8110     } \
8111   } while (0)
8112 
8113 #undef SETERRQ
8114 #define SETERRQ(comm, ierr, ...) \
8115   do { \
8116     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8117     return; \
8118   } while (0)
8119 
8120 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8121   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8122 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8123   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8124 #else
8125 #endif
8126 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8127 {
8128   Mat         mat = *mmat;
8129   PetscInt    m = *mm, n = *mn;
8130   InsertMode  addv = *maddv;
8131   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8132   PetscScalar value;
8133 
8134   MatCheckPreallocated(mat, 1);
8135   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8136   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8137   {
8138     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8139     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8140     PetscBool roworiented = aij->roworiented;
8141 
8142     /* Some Variables required in the macro */
8143     Mat         A     = aij->A;
8144     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8145     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8146     MatScalar  *aa;
8147     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8148     Mat         B                 = aij->B;
8149     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8150     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8151     MatScalar  *ba;
8152     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8153      * cannot use "#if defined" inside a macro. */
8154     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8155 
8156     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8157     PetscInt   nonew = a->nonew;
8158     MatScalar *ap1, *ap2;
8159 
8160     PetscFunctionBegin;
8161     PetscCall(MatSeqAIJGetArray(A, &aa));
8162     PetscCall(MatSeqAIJGetArray(B, &ba));
8163     for (i = 0; i < m; i++) {
8164       if (im[i] < 0) continue;
8165       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8166       if (im[i] >= rstart && im[i] < rend) {
8167         row      = im[i] - rstart;
8168         lastcol1 = -1;
8169         rp1      = aj + ai[row];
8170         ap1      = aa + ai[row];
8171         rmax1    = aimax[row];
8172         nrow1    = ailen[row];
8173         low1     = 0;
8174         high1    = nrow1;
8175         lastcol2 = -1;
8176         rp2      = bj + bi[row];
8177         ap2      = ba + bi[row];
8178         rmax2    = bimax[row];
8179         nrow2    = bilen[row];
8180         low2     = 0;
8181         high2    = nrow2;
8182 
8183         for (j = 0; j < n; j++) {
8184           if (roworiented) value = v[i * n + j];
8185           else value = v[i + j * m];
8186           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8187           if (in[j] >= cstart && in[j] < cend) {
8188             col = in[j] - cstart;
8189             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8190           } else if (in[j] < 0) continue;
8191           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8192             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8193           } else {
8194             if (mat->was_assembled) {
8195               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8196 #if defined(PETSC_USE_CTABLE)
8197               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8198               col--;
8199 #else
8200               col = aij->colmap[in[j]] - 1;
8201 #endif
8202               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8203                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8204                 col = in[j];
8205                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8206                 B        = aij->B;
8207                 b        = (Mat_SeqAIJ *)B->data;
8208                 bimax    = b->imax;
8209                 bi       = b->i;
8210                 bilen    = b->ilen;
8211                 bj       = b->j;
8212                 rp2      = bj + bi[row];
8213                 ap2      = ba + bi[row];
8214                 rmax2    = bimax[row];
8215                 nrow2    = bilen[row];
8216                 low2     = 0;
8217                 high2    = nrow2;
8218                 bm       = aij->B->rmap->n;
8219                 ba       = b->a;
8220                 inserted = PETSC_FALSE;
8221               }
8222             } else col = in[j];
8223             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8224           }
8225         }
8226       } else if (!aij->donotstash) {
8227         if (roworiented) {
8228           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8229         } else {
8230           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8231         }
8232       }
8233     }
8234     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8235     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8236   }
8237   PetscFunctionReturnVoid();
8238 }
8239 
8240 /* Undefining these here since they were redefined from their original definition above! No
8241  * other PETSc functions should be defined past this point, as it is impossible to recover the
8242  * original definitions */
8243 #undef PetscCall
8244 #undef SETERRQ
8245