xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c0c276a7a9f347b22187dda26ae7d35d5b9ed8a2)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK)
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309 
310   PetscFunctionBegin;
311   PetscCall(MatGetSize(A, &m, &n));
312   PetscCall(PetscCalloc1(n, &work));
313   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
314   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
316   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
317   if (type == NORM_2) {
318     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
319     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
320   } else if (type == NORM_1) {
321     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
322     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
323   } else if (type == NORM_INFINITY) {
324     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
325     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
327     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
328     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
329   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
330     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
331     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
332   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
333   if (type == NORM_INFINITY) {
334     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
335   } else {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
337   }
338   PetscCall(PetscFree(work));
339   if (type == NORM_2) {
340     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
341   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
342     for (i = 0; i < n; i++) reductions[i] /= m;
343   }
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 
347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
348 {
349   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
350   IS              sis, gis;
351   const PetscInt *isis, *igis;
352   PetscInt        n, *iis, nsis, ngis, rstart, i;
353 
354   PetscFunctionBegin;
355   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
356   PetscCall(MatFindNonzeroRows(a->B, &gis));
357   PetscCall(ISGetSize(gis, &ngis));
358   PetscCall(ISGetSize(sis, &nsis));
359   PetscCall(ISGetIndices(sis, &isis));
360   PetscCall(ISGetIndices(gis, &igis));
361 
362   PetscCall(PetscMalloc1(ngis + nsis, &iis));
363   PetscCall(PetscArraycpy(iis, igis, ngis));
364   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
365   n = ngis + nsis;
366   PetscCall(PetscSortRemoveDupsInt(&n, iis));
367   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
368   for (i = 0; i < n; i++) iis[i] += rstart;
369   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
370 
371   PetscCall(ISRestoreIndices(sis, &isis));
372   PetscCall(ISRestoreIndices(gis, &igis));
373   PetscCall(ISDestroy(&sis));
374   PetscCall(ISDestroy(&gis));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*
379   Local utility routine that creates a mapping from the global column
380 number to the local number in the off-diagonal part of the local
381 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
382 a slightly higher hash table cost; without it it is not scalable (each processor
383 has an order N integer array but is fast to access.
384 */
385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
386 {
387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
388   PetscInt    n   = aij->B->cmap->n, i;
389 
390   PetscFunctionBegin;
391   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
392 #if defined(PETSC_USE_CTABLE)
393   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
394   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
395 #else
396   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
397   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
398 #endif
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
403   do { \
404     if (col <= lastcol1) low1 = 0; \
405     else high1 = nrow1; \
406     lastcol1 = col; \
407     while (high1 - low1 > 5) { \
408       t = (low1 + high1) / 2; \
409       if (rp1[t] > col) high1 = t; \
410       else low1 = t; \
411     } \
412     for (_i = low1; _i < high1; _i++) { \
413       if (rp1[_i] > col) break; \
414       if (rp1[_i] == col) { \
415         if (addv == ADD_VALUES) { \
416           ap1[_i] += value; \
417           /* Not sure LogFlops will slow dow the code or not */ \
418           (void)PetscLogFlops(1.0); \
419         } else ap1[_i] = value; \
420         goto a_noinsert; \
421       } \
422     } \
423     if (value == 0.0 && ignorezeroentries && row != col) { \
424       low1  = 0; \
425       high1 = nrow1; \
426       goto a_noinsert; \
427     } \
428     if (nonew == 1) { \
429       low1  = 0; \
430       high1 = nrow1; \
431       goto a_noinsert; \
432     } \
433     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
434     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
435     N = nrow1++ - 1; \
436     a->nz++; \
437     high1++; \
438     /* shift up all the later entries in this row */ \
439     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
440     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
441     rp1[_i] = col; \
442     ap1[_i] = value; \
443   a_noinsert:; \
444     ailen[row] = nrow1; \
445   } while (0)
446 
447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
448   do { \
449     if (col <= lastcol2) low2 = 0; \
450     else high2 = nrow2; \
451     lastcol2 = col; \
452     while (high2 - low2 > 5) { \
453       t = (low2 + high2) / 2; \
454       if (rp2[t] > col) high2 = t; \
455       else low2 = t; \
456     } \
457     for (_i = low2; _i < high2; _i++) { \
458       if (rp2[_i] > col) break; \
459       if (rp2[_i] == col) { \
460         if (addv == ADD_VALUES) { \
461           ap2[_i] += value; \
462           (void)PetscLogFlops(1.0); \
463         } else ap2[_i] = value; \
464         goto b_noinsert; \
465       } \
466     } \
467     if (value == 0.0 && ignorezeroentries) { \
468       low2  = 0; \
469       high2 = nrow2; \
470       goto b_noinsert; \
471     } \
472     if (nonew == 1) { \
473       low2  = 0; \
474       high2 = nrow2; \
475       goto b_noinsert; \
476     } \
477     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
478     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
479     N = nrow2++ - 1; \
480     b->nz++; \
481     high2++; \
482     /* shift up all the later entries in this row */ \
483     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
484     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
485     rp2[_i] = col; \
486     ap2[_i] = value; \
487   b_noinsert:; \
488     bilen[row] = nrow2; \
489   } while (0)
490 
491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
492 {
493   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
494   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
495   PetscInt     l, *garray                         = mat->garray, diag;
496   PetscScalar *aa, *ba;
497 
498   PetscFunctionBegin;
499   /* code only works for square matrices A */
500 
501   /* find size of row to the left of the diagonal part */
502   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
503   row = row - diag;
504   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
505     if (garray[b->j[b->i[row] + l]] > diag) break;
506   }
507   if (l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512 
513   /* diagonal part */
514   if (a->i[row + 1] - a->i[row]) {
515     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
516     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
517     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
518   }
519 
520   /* right of diagonal part */
521   if (b->i[row + 1] - b->i[row] - l) {
522     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
523     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
524     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
525   }
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
530 {
531   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
532   PetscScalar value = 0.0;
533   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
534   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
535   PetscBool   roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat         A     = aij->A;
539   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
540   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
541   PetscBool   ignorezeroentries = a->ignorezeroentries;
542   Mat         B                 = aij->B;
543   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
544   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
545   MatScalar  *aa, *ba;
546   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
547   PetscInt    nonew;
548   MatScalar  *ap1, *ap2;
549 
550   PetscFunctionBegin;
551   PetscCall(MatSeqAIJGetArray(A, &aa));
552   PetscCall(MatSeqAIJGetArray(B, &ba));
553   for (i = 0; i < m; i++) {
554     if (im[i] < 0) continue;
555     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
560       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
567       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j = 0; j < n; j++) {
574         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
575         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
580         } else if (in[j] < 0) {
581           continue;
582         } else {
583           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
584           if (mat->was_assembled) {
585             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
586 #if defined(PETSC_USE_CTABLE)
587             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
593               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
594               col = in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ *)B->data;
598               bimax = b->imax;
599               bi    = b->i;
600               bilen = b->ilen;
601               bj    = b->j;
602               ba    = b->a;
603               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
604               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
612               if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) {
613                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
614               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
615             }
616           } else col = in[j];
617           nonew = b->nonew;
618           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
619         }
620       }
621     } else {
622       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
623       if (!aij->donotstash) {
624         mat->assembled = PETSC_FALSE;
625         if (roworiented) {
626           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
627         } else {
628           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
629         }
630       }
631     }
632   }
633   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
634   PetscCall(MatSeqAIJRestoreArray(B, &ba));
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
638 /*
639     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
640     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
641     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
642 */
643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
644 {
645   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
646   Mat         A      = aij->A; /* diagonal part of the matrix */
647   Mat         B      = aij->B; /* off-diagonal part of the matrix */
648   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
649   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
650   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
651   PetscInt   *ailen = a->ilen, *aj = a->j;
652   PetscInt   *bilen = b->ilen, *bj = b->j;
653   PetscInt    am          = aij->A->rmap->n, j;
654   PetscInt    diag_so_far = 0, dnz;
655   PetscInt    offd_so_far = 0, onz;
656 
657   PetscFunctionBegin;
658   /* Iterate over all rows of the matrix */
659   for (j = 0; j < am; j++) {
660     dnz = onz = 0;
661     /*  Iterate over all non-zero columns of the current row */
662     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
663       /* If column is in the diagonal */
664       if (mat_j[col] >= cstart && mat_j[col] < cend) {
665         aj[diag_so_far++] = mat_j[col] - cstart;
666         dnz++;
667       } else { /* off-diagonal entries */
668         bj[offd_so_far++] = mat_j[col];
669         onz++;
670       }
671     }
672     ailen[j] = dnz;
673     bilen[j] = onz;
674   }
675   PetscFunctionReturn(PETSC_SUCCESS);
676 }
677 
678 /*
679     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
680     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
681     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
682     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
683     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
684 */
685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
686 {
687   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
688   Mat          A    = aij->A; /* diagonal part of the matrix */
689   Mat          B    = aij->B; /* off-diagonal part of the matrix */
690   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
691   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
692   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
693   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
694   PetscInt    *ailen = a->ilen, *aj = a->j;
695   PetscInt    *bilen = b->ilen, *bj = b->j;
696   PetscInt     am          = aij->A->rmap->n, j;
697   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
698   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
699   PetscScalar *aa = a->a, *ba = b->a;
700 
701   PetscFunctionBegin;
702   /* Iterate over all rows of the matrix */
703   for (j = 0; j < am; j++) {
704     dnz_row = onz_row = 0;
705     rowstart_offd     = full_offd_i[j];
706     rowstart_diag     = full_diag_i[j];
707     /*  Iterate over all non-zero columns of the current row */
708     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
709       /* If column is in the diagonal */
710       if (mat_j[col] >= cstart && mat_j[col] < cend) {
711         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
712         aa[rowstart_diag + dnz_row] = mat_a[col];
713         dnz_row++;
714       } else { /* off-diagonal entries */
715         bj[rowstart_offd + onz_row] = mat_j[col];
716         ba[rowstart_offd + onz_row] = mat_a[col];
717         onz_row++;
718       }
719     }
720     ailen[j] = dnz_row;
721     bilen[j] = onz_row;
722   }
723   PetscFunctionReturn(PETSC_SUCCESS);
724 }
725 
726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
727 {
728   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
729   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
730   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
731 
732   PetscFunctionBegin;
733   for (i = 0; i < m; i++) {
734     if (idxm[i] < 0) continue; /* negative row */
735     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
736     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
737     row = idxm[i] - rstart;
738     for (j = 0; j < n; j++) {
739       if (idxn[j] < 0) continue; /* negative column */
740       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
741       if (idxn[j] >= cstart && idxn[j] < cend) {
742         col = idxn[j] - cstart;
743         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
744       } else {
745         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
746 #if defined(PETSC_USE_CTABLE)
747         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
748         col--;
749 #else
750         col = aij->colmap[idxn[j]] - 1;
751 #endif
752         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
753         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
754       }
755     }
756   }
757   PetscFunctionReturn(PETSC_SUCCESS);
758 }
759 
760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
761 {
762   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
763   PetscInt    nstash, reallocs;
764 
765   PetscFunctionBegin;
766   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
767 
768   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
769   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
770   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
771   PetscFunctionReturn(PETSC_SUCCESS);
772 }
773 
774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
775 {
776   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
777   PetscMPIInt  n;
778   PetscInt     i, j, rstart, ncols, flg;
779   PetscInt    *row, *col;
780   PetscBool    other_disassembled;
781   PetscScalar *val;
782 
783   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
784 
785   PetscFunctionBegin;
786   if (!aij->donotstash && !mat->nooffprocentries) {
787     while (1) {
788       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
789       if (!flg) break;
790 
791       for (i = 0; i < n;) {
792         /* Now identify the consecutive vals belonging to the same row */
793         for (j = i, rstart = row[j]; j < n; j++) {
794           if (row[j] != rstart) break;
795         }
796         if (j < n) ncols = j - i;
797         else ncols = n - i;
798         /* Now assemble all these values with a single function call */
799         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
800         i = j;
801       }
802     }
803     PetscCall(MatStashScatterEnd_Private(&mat->stash));
804   }
805 #if defined(PETSC_HAVE_DEVICE)
806   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
807   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
808   if (mat->boundtocpu) {
809     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
810     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
811   }
812 #endif
813   PetscCall(MatAssemblyBegin(aij->A, mode));
814   PetscCall(MatAssemblyEnd(aij->A, mode));
815 
816   /* determine if any processor has disassembled, if so we must
817      also disassemble ourself, in order that we may reassemble. */
818   /*
819      if nonzero structure of submatrix B cannot change then we know that
820      no processor disassembled thus we can skip this stuff
821   */
822   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
823     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
824     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
825       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
826     }
827   }
828   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
829   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
830 #if defined(PETSC_HAVE_DEVICE)
831   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
832 #endif
833   PetscCall(MatAssemblyBegin(aij->B, mode));
834   PetscCall(MatAssemblyEnd(aij->B, mode));
835 
836   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
837 
838   aij->rowvalues = NULL;
839 
840   PetscCall(VecDestroy(&aij->diag));
841 
842   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
843   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
844     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
845     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
846   }
847 #if defined(PETSC_HAVE_DEVICE)
848   mat->offloadmask = PETSC_OFFLOAD_BOTH;
849 #endif
850   PetscFunctionReturn(PETSC_SUCCESS);
851 }
852 
853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
854 {
855   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
856 
857   PetscFunctionBegin;
858   PetscCall(MatZeroEntries(l->A));
859   PetscCall(MatZeroEntries(l->B));
860   PetscFunctionReturn(PETSC_SUCCESS);
861 }
862 
863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
864 {
865   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
866   PetscInt   *lrows;
867   PetscInt    r, len;
868   PetscBool   cong;
869 
870   PetscFunctionBegin;
871   /* get locally owned rows */
872   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
873   PetscCall(MatHasCongruentLayouts(A, &cong));
874   /* fix right-hand side if needed */
875   if (x && b) {
876     const PetscScalar *xx;
877     PetscScalar       *bb;
878 
879     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
880     PetscCall(VecGetArrayRead(x, &xx));
881     PetscCall(VecGetArray(b, &bb));
882     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
883     PetscCall(VecRestoreArrayRead(x, &xx));
884     PetscCall(VecRestoreArray(b, &bb));
885   }
886 
887   if (diag != 0.0 && cong) {
888     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
889     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
890   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
891     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
892     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
893     PetscInt    nnwA, nnwB;
894     PetscBool   nnzA, nnzB;
895 
896     nnwA = aijA->nonew;
897     nnwB = aijB->nonew;
898     nnzA = aijA->keepnonzeropattern;
899     nnzB = aijB->keepnonzeropattern;
900     if (!nnzA) {
901       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
902       aijA->nonew = 0;
903     }
904     if (!nnzB) {
905       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
906       aijB->nonew = 0;
907     }
908     /* Must zero here before the next loop */
909     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
910     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
911     for (r = 0; r < len; ++r) {
912       const PetscInt row = lrows[r] + A->rmap->rstart;
913       if (row >= A->cmap->N) continue;
914       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
915     }
916     aijA->nonew = nnwA;
917     aijB->nonew = nnwB;
918   } else {
919     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
920     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
921   }
922   PetscCall(PetscFree(lrows));
923   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
924   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
928     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
929     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
930   }
931   PetscFunctionReturn(PETSC_SUCCESS);
932 }
933 
934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
935 {
936   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
937   PetscInt           n = A->rmap->n;
938   PetscInt           i, j, r, m, len = 0;
939   PetscInt          *lrows, *owners = A->rmap->range;
940   PetscMPIInt        p = 0;
941   PetscSFNode       *rrows;
942   PetscSF            sf;
943   const PetscScalar *xx;
944   PetscScalar       *bb, *mask, *aij_a;
945   Vec                xmask, lmask;
946   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
947   const PetscInt    *aj, *ii, *ridx;
948   PetscScalar       *aa;
949 
950   PetscFunctionBegin;
951   /* Create SF where leaves are input rows and roots are owned rows */
952   PetscCall(PetscMalloc1(n, &lrows));
953   for (r = 0; r < n; ++r) lrows[r] = -1;
954   PetscCall(PetscMalloc1(N, &rrows));
955   for (r = 0; r < N; ++r) {
956     const PetscInt idx = rows[r];
957     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
958     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
959       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
960     }
961     rrows[r].rank  = p;
962     rrows[r].index = rows[r] - owners[p];
963   }
964   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
965   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
966   /* Collect flags for rows to be zeroed */
967   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
968   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
969   PetscCall(PetscSFDestroy(&sf));
970   /* Compress and put in row numbers */
971   for (r = 0; r < n; ++r)
972     if (lrows[r] >= 0) lrows[len++] = r;
973   /* zero diagonal part of matrix */
974   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
975   /* handle off-diagonal part of matrix */
976   PetscCall(MatCreateVecs(A, &xmask, NULL));
977   PetscCall(VecDuplicate(l->lvec, &lmask));
978   PetscCall(VecGetArray(xmask, &bb));
979   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
980   PetscCall(VecRestoreArray(xmask, &bb));
981   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
982   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
983   PetscCall(VecDestroy(&xmask));
984   if (x && b) { /* this code is buggy when the row and column layout don't match */
985     PetscBool cong;
986 
987     PetscCall(MatHasCongruentLayouts(A, &cong));
988     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
989     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
990     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
991     PetscCall(VecGetArrayRead(l->lvec, &xx));
992     PetscCall(VecGetArray(b, &bb));
993   }
994   PetscCall(VecGetArray(lmask, &mask));
995   /* remove zeroed rows of off-diagonal matrix */
996   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
997   ii = aij->i;
998   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
999   /* loop over all elements of off process part of matrix zeroing removed columns*/
1000   if (aij->compressedrow.use) {
1001     m    = aij->compressedrow.nrows;
1002     ii   = aij->compressedrow.i;
1003     ridx = aij->compressedrow.rindex;
1004     for (i = 0; i < m; i++) {
1005       n  = ii[i + 1] - ii[i];
1006       aj = aij->j + ii[i];
1007       aa = aij_a + ii[i];
1008 
1009       for (j = 0; j < n; j++) {
1010         if (PetscAbsScalar(mask[*aj])) {
1011           if (b) bb[*ridx] -= *aa * xx[*aj];
1012           *aa = 0.0;
1013         }
1014         aa++;
1015         aj++;
1016       }
1017       ridx++;
1018     }
1019   } else { /* do not use compressed row format */
1020     m = l->B->rmap->n;
1021     for (i = 0; i < m; i++) {
1022       n  = ii[i + 1] - ii[i];
1023       aj = aij->j + ii[i];
1024       aa = aij_a + ii[i];
1025       for (j = 0; j < n; j++) {
1026         if (PetscAbsScalar(mask[*aj])) {
1027           if (b) bb[i] -= *aa * xx[*aj];
1028           *aa = 0.0;
1029         }
1030         aa++;
1031         aj++;
1032       }
1033     }
1034   }
1035   if (x && b) {
1036     PetscCall(VecRestoreArray(b, &bb));
1037     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1038   }
1039   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1040   PetscCall(VecRestoreArray(lmask, &mask));
1041   PetscCall(VecDestroy(&lmask));
1042   PetscCall(PetscFree(lrows));
1043 
1044   /* only change matrix nonzero state if pattern was allowed to be changed */
1045   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1046     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1047     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1048   }
1049   PetscFunctionReturn(PETSC_SUCCESS);
1050 }
1051 
1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1053 {
1054   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1055   PetscInt    nt;
1056   VecScatter  Mvctx = a->Mvctx;
1057 
1058   PetscFunctionBegin;
1059   PetscCall(VecGetLocalSize(xx, &nt));
1060   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1061   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1062   PetscUseTypeMethod(a->A, mult, xx, yy);
1063   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1064   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1065   PetscFunctionReturn(PETSC_SUCCESS);
1066 }
1067 
1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1069 {
1070   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1074   PetscFunctionReturn(PETSC_SUCCESS);
1075 }
1076 
1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1078 {
1079   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1080   VecScatter  Mvctx = a->Mvctx;
1081 
1082   PetscFunctionBegin;
1083   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1084   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1085   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1086   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1087   PetscFunctionReturn(PETSC_SUCCESS);
1088 }
1089 
1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1091 {
1092   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1093 
1094   PetscFunctionBegin;
1095   /* do nondiagonal part */
1096   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1097   /* do local part */
1098   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1099   /* add partial results together */
1100   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1101   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1102   PetscFunctionReturn(PETSC_SUCCESS);
1103 }
1104 
1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1106 {
1107   MPI_Comm    comm;
1108   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1109   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1110   IS          Me, Notme;
1111   PetscInt    M, N, first, last, *notme, i;
1112   PetscBool   lf;
1113   PetscMPIInt size;
1114 
1115   PetscFunctionBegin;
1116   /* Easy test: symmetric diagonal block */
1117   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1118   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1119   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1120   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1121   PetscCallMPI(MPI_Comm_size(comm, &size));
1122   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1123 
1124   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1125   PetscCall(MatGetSize(Amat, &M, &N));
1126   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1127   PetscCall(PetscMalloc1(N - last + first, &notme));
1128   for (i = 0; i < first; i++) notme[i] = i;
1129   for (i = last; i < M; i++) notme[i - last + first] = i;
1130   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1131   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1132   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1133   Aoff = Aoffs[0];
1134   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1135   Boff = Boffs[0];
1136   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1137   PetscCall(MatDestroyMatrices(1, &Aoffs));
1138   PetscCall(MatDestroyMatrices(1, &Boffs));
1139   PetscCall(ISDestroy(&Me));
1140   PetscCall(ISDestroy(&Notme));
1141   PetscCall(PetscFree(notme));
1142   PetscFunctionReturn(PETSC_SUCCESS);
1143 }
1144 
1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1146 {
1147   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1148 
1149   PetscFunctionBegin;
1150   /* do nondiagonal part */
1151   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1152   /* do local part */
1153   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1154   /* add partial results together */
1155   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1156   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1157   PetscFunctionReturn(PETSC_SUCCESS);
1158 }
1159 
1160 /*
1161   This only works correctly for square matrices where the subblock A->A is the
1162    diagonal block
1163 */
1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1165 {
1166   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1167 
1168   PetscFunctionBegin;
1169   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1170   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1171   PetscCall(MatGetDiagonal(a->A, v));
1172   PetscFunctionReturn(PETSC_SUCCESS);
1173 }
1174 
1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1176 {
1177   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1178 
1179   PetscFunctionBegin;
1180   PetscCall(MatScale(a->A, aa));
1181   PetscCall(MatScale(a->B, aa));
1182   PetscFunctionReturn(PETSC_SUCCESS);
1183 }
1184 
1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1186 {
1187   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1188   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1189   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1190   const PetscInt    *garray = aij->garray;
1191   const PetscScalar *aa, *ba;
1192   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1193   PetscInt64         nz, hnz;
1194   PetscInt          *rowlens;
1195   PetscInt          *colidxs;
1196   PetscScalar       *matvals;
1197   PetscMPIInt        rank;
1198 
1199   PetscFunctionBegin;
1200   PetscCall(PetscViewerSetUp(viewer));
1201 
1202   M  = mat->rmap->N;
1203   N  = mat->cmap->N;
1204   m  = mat->rmap->n;
1205   rs = mat->rmap->rstart;
1206   cs = mat->cmap->rstart;
1207   nz = A->nz + B->nz;
1208 
1209   /* write matrix header */
1210   header[0] = MAT_FILE_CLASSID;
1211   header[1] = M;
1212   header[2] = N;
1213   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1214   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1215   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1216   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1217 
1218   /* fill in and store row lengths  */
1219   PetscCall(PetscMalloc1(m, &rowlens));
1220   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1221   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1222   PetscCall(PetscFree(rowlens));
1223 
1224   /* fill in and store column indices */
1225   PetscCall(PetscMalloc1(nz, &colidxs));
1226   for (cnt = 0, i = 0; i < m; i++) {
1227     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1228       if (garray[B->j[jb]] > cs) break;
1229       colidxs[cnt++] = garray[B->j[jb]];
1230     }
1231     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1232     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1233   }
1234   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1235   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1236   PetscCall(PetscFree(colidxs));
1237 
1238   /* fill in and store nonzero values */
1239   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1240   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1241   PetscCall(PetscMalloc1(nz, &matvals));
1242   for (cnt = 0, i = 0; i < m; i++) {
1243     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1244       if (garray[B->j[jb]] > cs) break;
1245       matvals[cnt++] = ba[jb];
1246     }
1247     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1248     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1249   }
1250   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1251   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1252   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1254   PetscCall(PetscFree(matvals));
1255 
1256   /* write block size option to the viewer's .info file */
1257   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1258   PetscFunctionReturn(PETSC_SUCCESS);
1259 }
1260 
1261 #include <petscdraw.h>
1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1263 {
1264   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1265   PetscMPIInt       rank = aij->rank, size = aij->size;
1266   PetscBool         isdraw, iascii, isbinary;
1267   PetscViewer       sviewer;
1268   PetscViewerFormat format;
1269 
1270   PetscFunctionBegin;
1271   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1272   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1273   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1274   if (iascii) {
1275     PetscCall(PetscViewerGetFormat(viewer, &format));
1276     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1277       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1278       PetscCall(PetscMalloc1(size, &nz));
1279       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1280       for (i = 0; i < size; i++) {
1281         nmax = PetscMax(nmax, nz[i]);
1282         nmin = PetscMin(nmin, nz[i]);
1283         navg += nz[i];
1284       }
1285       PetscCall(PetscFree(nz));
1286       navg = navg / size;
1287       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1288       PetscFunctionReturn(PETSC_SUCCESS);
1289     }
1290     PetscCall(PetscViewerGetFormat(viewer, &format));
1291     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1292       MatInfo   info;
1293       PetscInt *inodes = NULL;
1294 
1295       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1296       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1297       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1298       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1299       if (!inodes) {
1300         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1301                                                      info.memory));
1302       } else {
1303         PetscCall(
1304           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1305       }
1306       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1307       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1308       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1309       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1310       PetscCall(PetscViewerFlush(viewer));
1311       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1312       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1313       PetscCall(VecScatterView(aij->Mvctx, viewer));
1314       PetscFunctionReturn(PETSC_SUCCESS);
1315     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1316       PetscInt inodecount, inodelimit, *inodes;
1317       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1318       if (inodes) {
1319         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1320       } else {
1321         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1322       }
1323       PetscFunctionReturn(PETSC_SUCCESS);
1324     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1325       PetscFunctionReturn(PETSC_SUCCESS);
1326     }
1327   } else if (isbinary) {
1328     if (size == 1) {
1329       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1330       PetscCall(MatView(aij->A, viewer));
1331     } else {
1332       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1333     }
1334     PetscFunctionReturn(PETSC_SUCCESS);
1335   } else if (iascii && size == 1) {
1336     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1337     PetscCall(MatView(aij->A, viewer));
1338     PetscFunctionReturn(PETSC_SUCCESS);
1339   } else if (isdraw) {
1340     PetscDraw draw;
1341     PetscBool isnull;
1342     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1343     PetscCall(PetscDrawIsNull(draw, &isnull));
1344     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1345   }
1346 
1347   { /* assemble the entire matrix onto first processor */
1348     Mat A = NULL, Av;
1349     IS  isrow, iscol;
1350 
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1352     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1353     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1354     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1355     /*  The commented code uses MatCreateSubMatrices instead */
1356     /*
1357     Mat *AA, A = NULL, Av;
1358     IS  isrow,iscol;
1359 
1360     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1361     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1362     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1363     if (rank == 0) {
1364        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1365        A    = AA[0];
1366        Av   = AA[0];
1367     }
1368     PetscCall(MatDestroySubMatrices(1,&AA));
1369 */
1370     PetscCall(ISDestroy(&iscol));
1371     PetscCall(ISDestroy(&isrow));
1372     /*
1373        Everyone has to call to draw the matrix since the graphics waits are
1374        synchronized across all processors that share the PetscDraw object
1375     */
1376     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1377     if (rank == 0) {
1378       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1379       PetscCall(MatView_SeqAIJ(Av, sviewer));
1380     }
1381     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1382     PetscCall(MatDestroy(&A));
1383   }
1384   PetscFunctionReturn(PETSC_SUCCESS);
1385 }
1386 
1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1388 {
1389   PetscBool iascii, isdraw, issocket, isbinary;
1390 
1391   PetscFunctionBegin;
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1395   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1396   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1397   PetscFunctionReturn(PETSC_SUCCESS);
1398 }
1399 
1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1401 {
1402   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1403   Vec         bb1 = NULL;
1404   PetscBool   hasop;
1405 
1406   PetscFunctionBegin;
1407   if (flag == SOR_APPLY_UPPER) {
1408     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1409     PetscFunctionReturn(PETSC_SUCCESS);
1410   }
1411 
1412   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1413 
1414   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1415     if (flag & SOR_ZERO_INITIAL_GUESS) {
1416       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1417       its--;
1418     }
1419 
1420     while (its--) {
1421       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1423 
1424       /* update rhs: bb1 = bb - B*x */
1425       PetscCall(VecScale(mat->lvec, -1.0));
1426       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1427 
1428       /* local sweep */
1429       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1430     }
1431   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1432     if (flag & SOR_ZERO_INITIAL_GUESS) {
1433       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1434       its--;
1435     }
1436     while (its--) {
1437       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1439 
1440       /* update rhs: bb1 = bb - B*x */
1441       PetscCall(VecScale(mat->lvec, -1.0));
1442       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1443 
1444       /* local sweep */
1445       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1446     }
1447   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1448     if (flag & SOR_ZERO_INITIAL_GUESS) {
1449       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1450       its--;
1451     }
1452     while (its--) {
1453       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1455 
1456       /* update rhs: bb1 = bb - B*x */
1457       PetscCall(VecScale(mat->lvec, -1.0));
1458       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1459 
1460       /* local sweep */
1461       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1462     }
1463   } else if (flag & SOR_EISENSTAT) {
1464     Vec xx1;
1465 
1466     PetscCall(VecDuplicate(bb, &xx1));
1467     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1468 
1469     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1470     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1471     if (!mat->diag) {
1472       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1473       PetscCall(MatGetDiagonal(matin, mat->diag));
1474     }
1475     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1476     if (hasop) {
1477       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1478     } else {
1479       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1480     }
1481     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1482 
1483     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1484 
1485     /* local sweep */
1486     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1487     PetscCall(VecAXPY(xx, 1.0, xx1));
1488     PetscCall(VecDestroy(&xx1));
1489   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1490 
1491   PetscCall(VecDestroy(&bb1));
1492 
1493   matin->factorerrortype = mat->A->factorerrortype;
1494   PetscFunctionReturn(PETSC_SUCCESS);
1495 }
1496 
1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1498 {
1499   Mat             aA, aB, Aperm;
1500   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1501   PetscScalar    *aa, *ba;
1502   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1503   PetscSF         rowsf, sf;
1504   IS              parcolp = NULL;
1505   PetscBool       done;
1506 
1507   PetscFunctionBegin;
1508   PetscCall(MatGetLocalSize(A, &m, &n));
1509   PetscCall(ISGetIndices(rowp, &rwant));
1510   PetscCall(ISGetIndices(colp, &cwant));
1511   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1512 
1513   /* Invert row permutation to find out where my rows should go */
1514   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1515   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1516   PetscCall(PetscSFSetFromOptions(rowsf));
1517   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1518   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1519   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1520 
1521   /* Invert column permutation to find out where my columns should go */
1522   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1523   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1524   PetscCall(PetscSFSetFromOptions(sf));
1525   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1526   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1527   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1528   PetscCall(PetscSFDestroy(&sf));
1529 
1530   PetscCall(ISRestoreIndices(rowp, &rwant));
1531   PetscCall(ISRestoreIndices(colp, &cwant));
1532   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1533 
1534   /* Find out where my gcols should go */
1535   PetscCall(MatGetSize(aB, NULL, &ng));
1536   PetscCall(PetscMalloc1(ng, &gcdest));
1537   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1538   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1539   PetscCall(PetscSFSetFromOptions(sf));
1540   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1541   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1542   PetscCall(PetscSFDestroy(&sf));
1543 
1544   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1545   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1546   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1547   for (i = 0; i < m; i++) {
1548     PetscInt    row = rdest[i];
1549     PetscMPIInt rowner;
1550     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1551     for (j = ai[i]; j < ai[i + 1]; j++) {
1552       PetscInt    col = cdest[aj[j]];
1553       PetscMPIInt cowner;
1554       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1555       if (rowner == cowner) dnnz[i]++;
1556       else onnz[i]++;
1557     }
1558     for (j = bi[i]; j < bi[i + 1]; j++) {
1559       PetscInt    col = gcdest[bj[j]];
1560       PetscMPIInt cowner;
1561       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1562       if (rowner == cowner) dnnz[i]++;
1563       else onnz[i]++;
1564     }
1565   }
1566   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1567   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1568   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1569   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1570   PetscCall(PetscSFDestroy(&rowsf));
1571 
1572   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1573   PetscCall(MatSeqAIJGetArray(aA, &aa));
1574   PetscCall(MatSeqAIJGetArray(aB, &ba));
1575   for (i = 0; i < m; i++) {
1576     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1577     PetscInt  j0, rowlen;
1578     rowlen = ai[i + 1] - ai[i];
1579     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1580       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1581       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1582     }
1583     rowlen = bi[i + 1] - bi[i];
1584     for (j0 = j = 0; j < rowlen; j0 = j) {
1585       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1586       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1587     }
1588   }
1589   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1590   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1591   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1592   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1593   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1594   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1595   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1596   PetscCall(PetscFree3(work, rdest, cdest));
1597   PetscCall(PetscFree(gcdest));
1598   if (parcolp) PetscCall(ISDestroy(&colp));
1599   *B = Aperm;
1600   PetscFunctionReturn(PETSC_SUCCESS);
1601 }
1602 
1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1604 {
1605   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1606 
1607   PetscFunctionBegin;
1608   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1609   if (ghosts) *ghosts = aij->garray;
1610   PetscFunctionReturn(PETSC_SUCCESS);
1611 }
1612 
1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1614 {
1615   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1616   Mat            A = mat->A, B = mat->B;
1617   PetscLogDouble isend[5], irecv[5];
1618 
1619   PetscFunctionBegin;
1620   info->block_size = 1.0;
1621   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1622 
1623   isend[0] = info->nz_used;
1624   isend[1] = info->nz_allocated;
1625   isend[2] = info->nz_unneeded;
1626   isend[3] = info->memory;
1627   isend[4] = info->mallocs;
1628 
1629   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1630 
1631   isend[0] += info->nz_used;
1632   isend[1] += info->nz_allocated;
1633   isend[2] += info->nz_unneeded;
1634   isend[3] += info->memory;
1635   isend[4] += info->mallocs;
1636   if (flag == MAT_LOCAL) {
1637     info->nz_used      = isend[0];
1638     info->nz_allocated = isend[1];
1639     info->nz_unneeded  = isend[2];
1640     info->memory       = isend[3];
1641     info->mallocs      = isend[4];
1642   } else if (flag == MAT_GLOBAL_MAX) {
1643     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1644 
1645     info->nz_used      = irecv[0];
1646     info->nz_allocated = irecv[1];
1647     info->nz_unneeded  = irecv[2];
1648     info->memory       = irecv[3];
1649     info->mallocs      = irecv[4];
1650   } else if (flag == MAT_GLOBAL_SUM) {
1651     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1652 
1653     info->nz_used      = irecv[0];
1654     info->nz_allocated = irecv[1];
1655     info->nz_unneeded  = irecv[2];
1656     info->memory       = irecv[3];
1657     info->mallocs      = irecv[4];
1658   }
1659   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1660   info->fill_ratio_needed = 0;
1661   info->factor_mallocs    = 0;
1662   PetscFunctionReturn(PETSC_SUCCESS);
1663 }
1664 
1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1666 {
1667   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1668 
1669   PetscFunctionBegin;
1670   switch (op) {
1671   case MAT_NEW_NONZERO_LOCATIONS:
1672   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1673   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1674   case MAT_KEEP_NONZERO_PATTERN:
1675   case MAT_NEW_NONZERO_LOCATION_ERR:
1676   case MAT_USE_INODES:
1677   case MAT_IGNORE_ZERO_ENTRIES:
1678   case MAT_FORM_EXPLICIT_TRANSPOSE:
1679     MatCheckPreallocated(A, 1);
1680     PetscCall(MatSetOption(a->A, op, flg));
1681     PetscCall(MatSetOption(a->B, op, flg));
1682     break;
1683   case MAT_ROW_ORIENTED:
1684     MatCheckPreallocated(A, 1);
1685     a->roworiented = flg;
1686 
1687     PetscCall(MatSetOption(a->A, op, flg));
1688     PetscCall(MatSetOption(a->B, op, flg));
1689     break;
1690   case MAT_IGNORE_OFF_PROC_ENTRIES:
1691     a->donotstash = flg;
1692     break;
1693   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1694   case MAT_SPD:
1695   case MAT_SYMMETRIC:
1696   case MAT_STRUCTURALLY_SYMMETRIC:
1697   case MAT_HERMITIAN:
1698   case MAT_SYMMETRY_ETERNAL:
1699   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1700   case MAT_SPD_ETERNAL:
1701     /* if the diagonal matrix is square it inherits some of the properties above */
1702     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1703     break;
1704   case MAT_SUBMAT_SINGLEIS:
1705     A->submat_singleis = flg;
1706     break;
1707   default:
1708     break;
1709   }
1710   PetscFunctionReturn(PETSC_SUCCESS);
1711 }
1712 
1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1714 {
1715   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1716   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1717   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1718   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1719   PetscInt    *cmap, *idx_p;
1720 
1721   PetscFunctionBegin;
1722   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1723   mat->getrowactive = PETSC_TRUE;
1724 
1725   if (!mat->rowvalues && (idx || v)) {
1726     /*
1727         allocate enough space to hold information from the longest row.
1728     */
1729     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1730     PetscInt    max = 1, tmp;
1731     for (i = 0; i < matin->rmap->n; i++) {
1732       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1733       if (max < tmp) max = tmp;
1734     }
1735     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1736   }
1737 
1738   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1739   lrow = row - rstart;
1740 
1741   pvA = &vworkA;
1742   pcA = &cworkA;
1743   pvB = &vworkB;
1744   pcB = &cworkB;
1745   if (!v) {
1746     pvA = NULL;
1747     pvB = NULL;
1748   }
1749   if (!idx) {
1750     pcA = NULL;
1751     if (!v) pcB = NULL;
1752   }
1753   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1754   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1755   nztot = nzA + nzB;
1756 
1757   cmap = mat->garray;
1758   if (v || idx) {
1759     if (nztot) {
1760       /* Sort by increasing column numbers, assuming A and B already sorted */
1761       PetscInt imark = -1;
1762       if (v) {
1763         *v = v_p = mat->rowvalues;
1764         for (i = 0; i < nzB; i++) {
1765           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1766           else break;
1767         }
1768         imark = i;
1769         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1770         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1771       }
1772       if (idx) {
1773         *idx = idx_p = mat->rowindices;
1774         if (imark > -1) {
1775           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1776         } else {
1777           for (i = 0; i < nzB; i++) {
1778             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1779             else break;
1780           }
1781           imark = i;
1782         }
1783         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1784         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1785       }
1786     } else {
1787       if (idx) *idx = NULL;
1788       if (v) *v = NULL;
1789     }
1790   }
1791   *nz = nztot;
1792   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1793   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1794   PetscFunctionReturn(PETSC_SUCCESS);
1795 }
1796 
1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1798 {
1799   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1800 
1801   PetscFunctionBegin;
1802   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1803   aij->getrowactive = PETSC_FALSE;
1804   PetscFunctionReturn(PETSC_SUCCESS);
1805 }
1806 
1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1808 {
1809   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1810   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1811   PetscInt         i, j, cstart = mat->cmap->rstart;
1812   PetscReal        sum = 0.0;
1813   const MatScalar *v, *amata, *bmata;
1814 
1815   PetscFunctionBegin;
1816   if (aij->size == 1) {
1817     PetscCall(MatNorm(aij->A, type, norm));
1818   } else {
1819     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1820     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1821     if (type == NORM_FROBENIUS) {
1822       v = amata;
1823       for (i = 0; i < amat->nz; i++) {
1824         sum += PetscRealPart(PetscConj(*v) * (*v));
1825         v++;
1826       }
1827       v = bmata;
1828       for (i = 0; i < bmat->nz; i++) {
1829         sum += PetscRealPart(PetscConj(*v) * (*v));
1830         v++;
1831       }
1832       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1833       *norm = PetscSqrtReal(*norm);
1834       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1835     } else if (type == NORM_1) { /* max column norm */
1836       PetscReal *tmp;
1837       PetscInt  *jj, *garray = aij->garray;
1838       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1839       *norm = 0.0;
1840       v     = amata;
1841       jj    = amat->j;
1842       for (j = 0; j < amat->nz; j++) {
1843         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1844         v++;
1845       }
1846       v  = bmata;
1847       jj = bmat->j;
1848       for (j = 0; j < bmat->nz; j++) {
1849         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1850         v++;
1851       }
1852       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1853       for (j = 0; j < mat->cmap->N; j++) {
1854         if (tmp[j] > *norm) *norm = tmp[j];
1855       }
1856       PetscCall(PetscFree(tmp));
1857       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1858     } else if (type == NORM_INFINITY) { /* max row norm */
1859       PetscReal ntemp = 0.0;
1860       for (j = 0; j < aij->A->rmap->n; j++) {
1861         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1862         sum = 0.0;
1863         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1864           sum += PetscAbsScalar(*v);
1865           v++;
1866         }
1867         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1868         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1869           sum += PetscAbsScalar(*v);
1870           v++;
1871         }
1872         if (sum > ntemp) ntemp = sum;
1873       }
1874       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1875       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1876     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1877     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1878     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1879   }
1880   PetscFunctionReturn(PETSC_SUCCESS);
1881 }
1882 
1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1884 {
1885   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1886   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1887   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1888   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1889   Mat              B, A_diag, *B_diag;
1890   const MatScalar *pbv, *bv;
1891 
1892   PetscFunctionBegin;
1893   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1894   ma = A->rmap->n;
1895   na = A->cmap->n;
1896   mb = a->B->rmap->n;
1897   nb = a->B->cmap->n;
1898   ai = Aloc->i;
1899   aj = Aloc->j;
1900   bi = Bloc->i;
1901   bj = Bloc->j;
1902   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1903     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1904     PetscSFNode         *oloc;
1905     PETSC_UNUSED PetscSF sf;
1906 
1907     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1908     /* compute d_nnz for preallocation */
1909     PetscCall(PetscArrayzero(d_nnz, na));
1910     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1911     /* compute local off-diagonal contributions */
1912     PetscCall(PetscArrayzero(g_nnz, nb));
1913     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1914     /* map those to global */
1915     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1916     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1917     PetscCall(PetscSFSetFromOptions(sf));
1918     PetscCall(PetscArrayzero(o_nnz, na));
1919     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1920     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1921     PetscCall(PetscSFDestroy(&sf));
1922 
1923     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1924     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1925     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1926     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1927     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1928     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1929   } else {
1930     B = *matout;
1931     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1932   }
1933 
1934   b           = (Mat_MPIAIJ *)B->data;
1935   A_diag      = a->A;
1936   B_diag      = &b->A;
1937   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1938   A_diag_ncol = A_diag->cmap->N;
1939   B_diag_ilen = sub_B_diag->ilen;
1940   B_diag_i    = sub_B_diag->i;
1941 
1942   /* Set ilen for diagonal of B */
1943   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1944 
1945   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1946   very quickly (=without using MatSetValues), because all writes are local. */
1947   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1948   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1949 
1950   /* copy over the B part */
1951   PetscCall(PetscMalloc1(bi[mb], &cols));
1952   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1953   pbv = bv;
1954   row = A->rmap->rstart;
1955   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1956   cols_tmp = cols;
1957   for (i = 0; i < mb; i++) {
1958     ncol = bi[i + 1] - bi[i];
1959     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1960     row++;
1961     if (pbv) pbv += ncol;
1962     if (cols_tmp) cols_tmp += ncol;
1963   }
1964   PetscCall(PetscFree(cols));
1965   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1966 
1967   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1968   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1969   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1970     *matout = B;
1971   } else {
1972     PetscCall(MatHeaderMerge(A, &B));
1973   }
1974   PetscFunctionReturn(PETSC_SUCCESS);
1975 }
1976 
1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1978 {
1979   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1980   Mat         a = aij->A, b = aij->B;
1981   PetscInt    s1, s2, s3;
1982 
1983   PetscFunctionBegin;
1984   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1985   if (rr) {
1986     PetscCall(VecGetLocalSize(rr, &s1));
1987     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1988     /* Overlap communication with computation. */
1989     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1990   }
1991   if (ll) {
1992     PetscCall(VecGetLocalSize(ll, &s1));
1993     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1994     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1995   }
1996   /* scale  the diagonal block */
1997   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1998 
1999   if (rr) {
2000     /* Do a scatter end and then right scale the off-diagonal block */
2001     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2002     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2003   }
2004   PetscFunctionReturn(PETSC_SUCCESS);
2005 }
2006 
2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2008 {
2009   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2010 
2011   PetscFunctionBegin;
2012   PetscCall(MatSetUnfactored(a->A));
2013   PetscFunctionReturn(PETSC_SUCCESS);
2014 }
2015 
2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2017 {
2018   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2019   Mat         a, b, c, d;
2020   PetscBool   flg;
2021 
2022   PetscFunctionBegin;
2023   a = matA->A;
2024   b = matA->B;
2025   c = matB->A;
2026   d = matB->B;
2027 
2028   PetscCall(MatEqual(a, c, &flg));
2029   if (flg) PetscCall(MatEqual(b, d, &flg));
2030   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2035 {
2036   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2037   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2038 
2039   PetscFunctionBegin;
2040   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2041   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2042     /* because of the column compression in the off-processor part of the matrix a->B,
2043        the number of columns in a->B and b->B may be different, hence we cannot call
2044        the MatCopy() directly on the two parts. If need be, we can provide a more
2045        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2046        then copying the submatrices */
2047     PetscCall(MatCopy_Basic(A, B, str));
2048   } else {
2049     PetscCall(MatCopy(a->A, b->A, str));
2050     PetscCall(MatCopy(a->B, b->B, str));
2051   }
2052   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2053   PetscFunctionReturn(PETSC_SUCCESS);
2054 }
2055 
2056 /*
2057    Computes the number of nonzeros per row needed for preallocation when X and Y
2058    have different nonzero structure.
2059 */
2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2061 {
2062   PetscInt i, j, k, nzx, nzy;
2063 
2064   PetscFunctionBegin;
2065   /* Set the number of nonzeros in the new matrix */
2066   for (i = 0; i < m; i++) {
2067     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2068     nzx    = xi[i + 1] - xi[i];
2069     nzy    = yi[i + 1] - yi[i];
2070     nnz[i] = 0;
2071     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2072       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2073       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2074       nnz[i]++;
2075     }
2076     for (; k < nzy; k++) nnz[i]++;
2077   }
2078   PetscFunctionReturn(PETSC_SUCCESS);
2079 }
2080 
2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2083 {
2084   PetscInt    m = Y->rmap->N;
2085   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2086   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2087 
2088   PetscFunctionBegin;
2089   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2090   PetscFunctionReturn(PETSC_SUCCESS);
2091 }
2092 
2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2094 {
2095   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2096 
2097   PetscFunctionBegin;
2098   if (str == SAME_NONZERO_PATTERN) {
2099     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2100     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2101   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2102     PetscCall(MatAXPY_Basic(Y, a, X, str));
2103   } else {
2104     Mat       B;
2105     PetscInt *nnz_d, *nnz_o;
2106 
2107     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2108     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2109     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2110     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2111     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2112     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2113     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2114     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2115     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2116     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2117     PetscCall(MatHeaderMerge(Y, &B));
2118     PetscCall(PetscFree(nnz_d));
2119     PetscCall(PetscFree(nnz_o));
2120   }
2121   PetscFunctionReturn(PETSC_SUCCESS);
2122 }
2123 
2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2125 
2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2127 {
2128   PetscFunctionBegin;
2129   if (PetscDefined(USE_COMPLEX)) {
2130     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2131 
2132     PetscCall(MatConjugate_SeqAIJ(aij->A));
2133     PetscCall(MatConjugate_SeqAIJ(aij->B));
2134   }
2135   PetscFunctionReturn(PETSC_SUCCESS);
2136 }
2137 
2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2139 {
2140   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2141 
2142   PetscFunctionBegin;
2143   PetscCall(MatRealPart(a->A));
2144   PetscCall(MatRealPart(a->B));
2145   PetscFunctionReturn(PETSC_SUCCESS);
2146 }
2147 
2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2149 {
2150   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2151 
2152   PetscFunctionBegin;
2153   PetscCall(MatImaginaryPart(a->A));
2154   PetscCall(MatImaginaryPart(a->B));
2155   PetscFunctionReturn(PETSC_SUCCESS);
2156 }
2157 
2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2159 {
2160   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2161   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2162   PetscScalar       *vv;
2163   Vec                vB, vA;
2164   const PetscScalar *va, *vb;
2165 
2166   PetscFunctionBegin;
2167   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2168   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2169 
2170   PetscCall(VecGetArrayRead(vA, &va));
2171   if (idx) {
2172     for (i = 0; i < m; i++) {
2173       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2174     }
2175   }
2176 
2177   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2178   PetscCall(PetscMalloc1(m, &idxb));
2179   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2180 
2181   PetscCall(VecGetArrayWrite(v, &vv));
2182   PetscCall(VecGetArrayRead(vB, &vb));
2183   for (i = 0; i < m; i++) {
2184     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2185       vv[i] = vb[i];
2186       if (idx) idx[i] = a->garray[idxb[i]];
2187     } else {
2188       vv[i] = va[i];
2189       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2190     }
2191   }
2192   PetscCall(VecRestoreArrayWrite(v, &vv));
2193   PetscCall(VecRestoreArrayRead(vA, &va));
2194   PetscCall(VecRestoreArrayRead(vB, &vb));
2195   PetscCall(PetscFree(idxb));
2196   PetscCall(VecDestroy(&vA));
2197   PetscCall(VecDestroy(&vB));
2198   PetscFunctionReturn(PETSC_SUCCESS);
2199 }
2200 
2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2202 {
2203   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2204   Vec         vB, vA;
2205 
2206   PetscFunctionBegin;
2207   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2208   PetscCall(MatGetRowSumAbs(a->A, vA));
2209   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2210   PetscCall(MatGetRowSumAbs(a->B, vB));
2211   PetscCall(VecAXPY(vA, 1.0, vB));
2212   PetscCall(VecDestroy(&vB));
2213   PetscCall(VecCopy(vA, v));
2214   PetscCall(VecDestroy(&vA));
2215   PetscFunctionReturn(PETSC_SUCCESS);
2216 }
2217 
2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2219 {
2220   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2221   PetscInt           m = A->rmap->n, n = A->cmap->n;
2222   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2223   PetscInt          *cmap = mat->garray;
2224   PetscInt          *diagIdx, *offdiagIdx;
2225   Vec                diagV, offdiagV;
2226   PetscScalar       *a, *diagA, *offdiagA;
2227   const PetscScalar *ba, *bav;
2228   PetscInt           r, j, col, ncols, *bi, *bj;
2229   Mat                B = mat->B;
2230   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2231 
2232   PetscFunctionBegin;
2233   /* When a process holds entire A and other processes have no entry */
2234   if (A->cmap->N == n) {
2235     PetscCall(VecGetArrayWrite(v, &diagA));
2236     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2237     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2238     PetscCall(VecDestroy(&diagV));
2239     PetscCall(VecRestoreArrayWrite(v, &diagA));
2240     PetscFunctionReturn(PETSC_SUCCESS);
2241   } else if (n == 0) {
2242     if (m) {
2243       PetscCall(VecGetArrayWrite(v, &a));
2244       for (r = 0; r < m; r++) {
2245         a[r] = 0.0;
2246         if (idx) idx[r] = -1;
2247       }
2248       PetscCall(VecRestoreArrayWrite(v, &a));
2249     }
2250     PetscFunctionReturn(PETSC_SUCCESS);
2251   }
2252 
2253   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2255   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2256   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2257 
2258   /* Get offdiagIdx[] for implicit 0.0 */
2259   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2260   ba = bav;
2261   bi = b->i;
2262   bj = b->j;
2263   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2264   for (r = 0; r < m; r++) {
2265     ncols = bi[r + 1] - bi[r];
2266     if (ncols == A->cmap->N - n) { /* Brow is dense */
2267       offdiagA[r]   = *ba;
2268       offdiagIdx[r] = cmap[0];
2269     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2270       offdiagA[r] = 0.0;
2271 
2272       /* Find first hole in the cmap */
2273       for (j = 0; j < ncols; j++) {
2274         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2275         if (col > j && j < cstart) {
2276           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2277           break;
2278         } else if (col > j + n && j >= cstart) {
2279           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2280           break;
2281         }
2282       }
2283       if (j == ncols && ncols < A->cmap->N - n) {
2284         /* a hole is outside compressed Bcols */
2285         if (ncols == 0) {
2286           if (cstart) {
2287             offdiagIdx[r] = 0;
2288           } else offdiagIdx[r] = cend;
2289         } else { /* ncols > 0 */
2290           offdiagIdx[r] = cmap[ncols - 1] + 1;
2291           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2292         }
2293       }
2294     }
2295 
2296     for (j = 0; j < ncols; j++) {
2297       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2298         offdiagA[r]   = *ba;
2299         offdiagIdx[r] = cmap[*bj];
2300       }
2301       ba++;
2302       bj++;
2303     }
2304   }
2305 
2306   PetscCall(VecGetArrayWrite(v, &a));
2307   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2308   for (r = 0; r < m; ++r) {
2309     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2310       a[r] = diagA[r];
2311       if (idx) idx[r] = cstart + diagIdx[r];
2312     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2313       a[r] = diagA[r];
2314       if (idx) {
2315         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2316           idx[r] = cstart + diagIdx[r];
2317         } else idx[r] = offdiagIdx[r];
2318       }
2319     } else {
2320       a[r] = offdiagA[r];
2321       if (idx) idx[r] = offdiagIdx[r];
2322     }
2323   }
2324   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2325   PetscCall(VecRestoreArrayWrite(v, &a));
2326   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2327   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2328   PetscCall(VecDestroy(&diagV));
2329   PetscCall(VecDestroy(&offdiagV));
2330   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2331   PetscFunctionReturn(PETSC_SUCCESS);
2332 }
2333 
2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2335 {
2336   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2337   PetscInt           m = A->rmap->n, n = A->cmap->n;
2338   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2339   PetscInt          *cmap = mat->garray;
2340   PetscInt          *diagIdx, *offdiagIdx;
2341   Vec                diagV, offdiagV;
2342   PetscScalar       *a, *diagA, *offdiagA;
2343   const PetscScalar *ba, *bav;
2344   PetscInt           r, j, col, ncols, *bi, *bj;
2345   Mat                B = mat->B;
2346   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2347 
2348   PetscFunctionBegin;
2349   /* When a process holds entire A and other processes have no entry */
2350   if (A->cmap->N == n) {
2351     PetscCall(VecGetArrayWrite(v, &diagA));
2352     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2353     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2354     PetscCall(VecDestroy(&diagV));
2355     PetscCall(VecRestoreArrayWrite(v, &diagA));
2356     PetscFunctionReturn(PETSC_SUCCESS);
2357   } else if (n == 0) {
2358     if (m) {
2359       PetscCall(VecGetArrayWrite(v, &a));
2360       for (r = 0; r < m; r++) {
2361         a[r] = PETSC_MAX_REAL;
2362         if (idx) idx[r] = -1;
2363       }
2364       PetscCall(VecRestoreArrayWrite(v, &a));
2365     }
2366     PetscFunctionReturn(PETSC_SUCCESS);
2367   }
2368 
2369   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2371   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2372   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2373 
2374   /* Get offdiagIdx[] for implicit 0.0 */
2375   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2376   ba = bav;
2377   bi = b->i;
2378   bj = b->j;
2379   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2380   for (r = 0; r < m; r++) {
2381     ncols = bi[r + 1] - bi[r];
2382     if (ncols == A->cmap->N - n) { /* Brow is dense */
2383       offdiagA[r]   = *ba;
2384       offdiagIdx[r] = cmap[0];
2385     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2386       offdiagA[r] = 0.0;
2387 
2388       /* Find first hole in the cmap */
2389       for (j = 0; j < ncols; j++) {
2390         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2391         if (col > j && j < cstart) {
2392           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2393           break;
2394         } else if (col > j + n && j >= cstart) {
2395           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2396           break;
2397         }
2398       }
2399       if (j == ncols && ncols < A->cmap->N - n) {
2400         /* a hole is outside compressed Bcols */
2401         if (ncols == 0) {
2402           if (cstart) {
2403             offdiagIdx[r] = 0;
2404           } else offdiagIdx[r] = cend;
2405         } else { /* ncols > 0 */
2406           offdiagIdx[r] = cmap[ncols - 1] + 1;
2407           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2408         }
2409       }
2410     }
2411 
2412     for (j = 0; j < ncols; j++) {
2413       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2414         offdiagA[r]   = *ba;
2415         offdiagIdx[r] = cmap[*bj];
2416       }
2417       ba++;
2418       bj++;
2419     }
2420   }
2421 
2422   PetscCall(VecGetArrayWrite(v, &a));
2423   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2424   for (r = 0; r < m; ++r) {
2425     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2426       a[r] = diagA[r];
2427       if (idx) idx[r] = cstart + diagIdx[r];
2428     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2429       a[r] = diagA[r];
2430       if (idx) {
2431         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2432           idx[r] = cstart + diagIdx[r];
2433         } else idx[r] = offdiagIdx[r];
2434       }
2435     } else {
2436       a[r] = offdiagA[r];
2437       if (idx) idx[r] = offdiagIdx[r];
2438     }
2439   }
2440   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2441   PetscCall(VecRestoreArrayWrite(v, &a));
2442   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2443   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2444   PetscCall(VecDestroy(&diagV));
2445   PetscCall(VecDestroy(&offdiagV));
2446   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2447   PetscFunctionReturn(PETSC_SUCCESS);
2448 }
2449 
2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2451 {
2452   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2453   PetscInt           m = A->rmap->n, n = A->cmap->n;
2454   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2455   PetscInt          *cmap = mat->garray;
2456   PetscInt          *diagIdx, *offdiagIdx;
2457   Vec                diagV, offdiagV;
2458   PetscScalar       *a, *diagA, *offdiagA;
2459   const PetscScalar *ba, *bav;
2460   PetscInt           r, j, col, ncols, *bi, *bj;
2461   Mat                B = mat->B;
2462   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2463 
2464   PetscFunctionBegin;
2465   /* When a process holds entire A and other processes have no entry */
2466   if (A->cmap->N == n) {
2467     PetscCall(VecGetArrayWrite(v, &diagA));
2468     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2469     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2470     PetscCall(VecDestroy(&diagV));
2471     PetscCall(VecRestoreArrayWrite(v, &diagA));
2472     PetscFunctionReturn(PETSC_SUCCESS);
2473   } else if (n == 0) {
2474     if (m) {
2475       PetscCall(VecGetArrayWrite(v, &a));
2476       for (r = 0; r < m; r++) {
2477         a[r] = PETSC_MIN_REAL;
2478         if (idx) idx[r] = -1;
2479       }
2480       PetscCall(VecRestoreArrayWrite(v, &a));
2481     }
2482     PetscFunctionReturn(PETSC_SUCCESS);
2483   }
2484 
2485   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2487   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2488   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2489 
2490   /* Get offdiagIdx[] for implicit 0.0 */
2491   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2492   ba = bav;
2493   bi = b->i;
2494   bj = b->j;
2495   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2496   for (r = 0; r < m; r++) {
2497     ncols = bi[r + 1] - bi[r];
2498     if (ncols == A->cmap->N - n) { /* Brow is dense */
2499       offdiagA[r]   = *ba;
2500       offdiagIdx[r] = cmap[0];
2501     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2502       offdiagA[r] = 0.0;
2503 
2504       /* Find first hole in the cmap */
2505       for (j = 0; j < ncols; j++) {
2506         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2507         if (col > j && j < cstart) {
2508           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2509           break;
2510         } else if (col > j + n && j >= cstart) {
2511           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2512           break;
2513         }
2514       }
2515       if (j == ncols && ncols < A->cmap->N - n) {
2516         /* a hole is outside compressed Bcols */
2517         if (ncols == 0) {
2518           if (cstart) {
2519             offdiagIdx[r] = 0;
2520           } else offdiagIdx[r] = cend;
2521         } else { /* ncols > 0 */
2522           offdiagIdx[r] = cmap[ncols - 1] + 1;
2523           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2524         }
2525       }
2526     }
2527 
2528     for (j = 0; j < ncols; j++) {
2529       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2530         offdiagA[r]   = *ba;
2531         offdiagIdx[r] = cmap[*bj];
2532       }
2533       ba++;
2534       bj++;
2535     }
2536   }
2537 
2538   PetscCall(VecGetArrayWrite(v, &a));
2539   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2540   for (r = 0; r < m; ++r) {
2541     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2542       a[r] = diagA[r];
2543       if (idx) idx[r] = cstart + diagIdx[r];
2544     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2545       a[r] = diagA[r];
2546       if (idx) {
2547         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2548           idx[r] = cstart + diagIdx[r];
2549         } else idx[r] = offdiagIdx[r];
2550       }
2551     } else {
2552       a[r] = offdiagA[r];
2553       if (idx) idx[r] = offdiagIdx[r];
2554     }
2555   }
2556   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2557   PetscCall(VecRestoreArrayWrite(v, &a));
2558   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2559   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2560   PetscCall(VecDestroy(&diagV));
2561   PetscCall(VecDestroy(&offdiagV));
2562   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2563   PetscFunctionReturn(PETSC_SUCCESS);
2564 }
2565 
2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2567 {
2568   Mat *dummy;
2569 
2570   PetscFunctionBegin;
2571   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2572   *newmat = *dummy;
2573   PetscCall(PetscFree(dummy));
2574   PetscFunctionReturn(PETSC_SUCCESS);
2575 }
2576 
2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2578 {
2579   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2580 
2581   PetscFunctionBegin;
2582   PetscCall(MatInvertBlockDiagonal(a->A, values));
2583   A->factorerrortype = a->A->factorerrortype;
2584   PetscFunctionReturn(PETSC_SUCCESS);
2585 }
2586 
2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2588 {
2589   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2590 
2591   PetscFunctionBegin;
2592   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2593   PetscCall(MatSetRandom(aij->A, rctx));
2594   if (x->assembled) {
2595     PetscCall(MatSetRandom(aij->B, rctx));
2596   } else {
2597     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2598   }
2599   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2600   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2601   PetscFunctionReturn(PETSC_SUCCESS);
2602 }
2603 
2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2605 {
2606   PetscFunctionBegin;
2607   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2608   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2609   PetscFunctionReturn(PETSC_SUCCESS);
2610 }
2611 
2612 /*@
2613   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2614 
2615   Not Collective
2616 
2617   Input Parameter:
2618 . A - the matrix
2619 
2620   Output Parameter:
2621 . nz - the number of nonzeros
2622 
2623   Level: advanced
2624 
2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2626 @*/
2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2628 {
2629   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2630   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2631   PetscBool   isaij;
2632 
2633   PetscFunctionBegin;
2634   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2635   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2636   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2637   PetscFunctionReturn(PETSC_SUCCESS);
2638 }
2639 
2640 /*@
2641   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2642 
2643   Collective
2644 
2645   Input Parameters:
2646 + A  - the matrix
2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2648 
2649   Level: advanced
2650 
2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2652 @*/
2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2654 {
2655   PetscFunctionBegin;
2656   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2657   PetscFunctionReturn(PETSC_SUCCESS);
2658 }
2659 
2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2661 {
2662   PetscBool sc = PETSC_FALSE, flg;
2663 
2664   PetscFunctionBegin;
2665   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2666   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2667   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2668   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2669   PetscOptionsHeadEnd();
2670   PetscFunctionReturn(PETSC_SUCCESS);
2671 }
2672 
2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2674 {
2675   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2676   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2677 
2678   PetscFunctionBegin;
2679   if (!Y->preallocated) {
2680     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2681   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2682     PetscInt nonew = aij->nonew;
2683     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2684     aij->nonew = nonew;
2685   }
2686   PetscCall(MatShift_Basic(Y, a));
2687   PetscFunctionReturn(PETSC_SUCCESS);
2688 }
2689 
2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2691 {
2692   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2693 
2694   PetscFunctionBegin;
2695   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2696   PetscCall(MatMissingDiagonal(a->A, missing, d));
2697   if (d) {
2698     PetscInt rstart;
2699     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2700     *d += rstart;
2701   }
2702   PetscFunctionReturn(PETSC_SUCCESS);
2703 }
2704 
2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2706 {
2707   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2708 
2709   PetscFunctionBegin;
2710   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2711   PetscFunctionReturn(PETSC_SUCCESS);
2712 }
2713 
2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2715 {
2716   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2717 
2718   PetscFunctionBegin;
2719   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2720   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2721   PetscFunctionReturn(PETSC_SUCCESS);
2722 }
2723 
2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2725                                        MatGetRow_MPIAIJ,
2726                                        MatRestoreRow_MPIAIJ,
2727                                        MatMult_MPIAIJ,
2728                                        /* 4*/ MatMultAdd_MPIAIJ,
2729                                        MatMultTranspose_MPIAIJ,
2730                                        MatMultTransposeAdd_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                        /*10*/ NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        MatSOR_MPIAIJ,
2738                                        MatTranspose_MPIAIJ,
2739                                        /*15*/ MatGetInfo_MPIAIJ,
2740                                        MatEqual_MPIAIJ,
2741                                        MatGetDiagonal_MPIAIJ,
2742                                        MatDiagonalScale_MPIAIJ,
2743                                        MatNorm_MPIAIJ,
2744                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2745                                        MatAssemblyEnd_MPIAIJ,
2746                                        MatSetOption_MPIAIJ,
2747                                        MatZeroEntries_MPIAIJ,
2748                                        /*24*/ MatZeroRows_MPIAIJ,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        /*29*/ MatSetUp_MPI_Hash,
2754                                        NULL,
2755                                        NULL,
2756                                        MatGetDiagonalBlock_MPIAIJ,
2757                                        NULL,
2758                                        /*34*/ MatDuplicate_MPIAIJ,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        /*39*/ MatAXPY_MPIAIJ,
2764                                        MatCreateSubMatrices_MPIAIJ,
2765                                        MatIncreaseOverlap_MPIAIJ,
2766                                        MatGetValues_MPIAIJ,
2767                                        MatCopy_MPIAIJ,
2768                                        /*44*/ MatGetRowMax_MPIAIJ,
2769                                        MatScale_MPIAIJ,
2770                                        MatShift_MPIAIJ,
2771                                        MatDiagonalSet_MPIAIJ,
2772                                        MatZeroRowsColumns_MPIAIJ,
2773                                        /*49*/ MatSetRandom_MPIAIJ,
2774                                        MatGetRowIJ_MPIAIJ,
2775                                        MatRestoreRowIJ_MPIAIJ,
2776                                        NULL,
2777                                        NULL,
2778                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2779                                        NULL,
2780                                        MatSetUnfactored_MPIAIJ,
2781                                        MatPermute_MPIAIJ,
2782                                        NULL,
2783                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2784                                        MatDestroy_MPIAIJ,
2785                                        MatView_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        /*64*/ NULL,
2789                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2790                                        NULL,
2791                                        NULL,
2792                                        NULL,
2793                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2794                                        MatGetRowMinAbs_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        NULL,
2799                                        /*75*/ MatFDColoringApply_AIJ,
2800                                        MatSetFromOptions_MPIAIJ,
2801                                        NULL,
2802                                        NULL,
2803                                        MatFindZeroDiagonals_MPIAIJ,
2804                                        /*80*/ NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        /*83*/ MatLoad_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        NULL,
2811                                        NULL,
2812                                        NULL,
2813                                        /*89*/ NULL,
2814                                        NULL,
2815                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        MatBindToCPU_MPIAIJ,
2823                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2824                                        NULL,
2825                                        NULL,
2826                                        MatConjugate_MPIAIJ,
2827                                        NULL,
2828                                        /*104*/ MatSetValuesRow_MPIAIJ,
2829                                        MatRealPart_MPIAIJ,
2830                                        MatImaginaryPart_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        /*109*/ NULL,
2834                                        NULL,
2835                                        MatGetRowMin_MPIAIJ,
2836                                        NULL,
2837                                        MatMissingDiagonal_MPIAIJ,
2838                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2839                                        NULL,
2840                                        MatGetGhosts_MPIAIJ,
2841                                        NULL,
2842                                        NULL,
2843                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2844                                        NULL,
2845                                        NULL,
2846                                        NULL,
2847                                        MatGetMultiProcBlock_MPIAIJ,
2848                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2849                                        MatGetColumnReductions_MPIAIJ,
2850                                        MatInvertBlockDiagonal_MPIAIJ,
2851                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2852                                        MatCreateSubMatricesMPI_MPIAIJ,
2853                                        /*129*/ NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2857                                        NULL,
2858                                        /*134*/ NULL,
2859                                        NULL,
2860                                        NULL,
2861                                        NULL,
2862                                        NULL,
2863                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2864                                        NULL,
2865                                        NULL,
2866                                        MatFDColoringSetUp_MPIXAIJ,
2867                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2868                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2869                                        /*145*/ NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        MatCreateGraph_Simple_AIJ,
2873                                        NULL,
2874                                        /*150*/ NULL,
2875                                        MatEliminateZeros_MPIAIJ,
2876                                        MatGetRowSumAbs_MPIAIJ,
2877                                        NULL,
2878                                        NULL,
2879                                        /*155*/ NULL,
2880                                        MatCopyHashToXAIJ_MPI_Hash};
2881 
2882 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2883 {
2884   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2885 
2886   PetscFunctionBegin;
2887   PetscCall(MatStoreValues(aij->A));
2888   PetscCall(MatStoreValues(aij->B));
2889   PetscFunctionReturn(PETSC_SUCCESS);
2890 }
2891 
2892 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2893 {
2894   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2895 
2896   PetscFunctionBegin;
2897   PetscCall(MatRetrieveValues(aij->A));
2898   PetscCall(MatRetrieveValues(aij->B));
2899   PetscFunctionReturn(PETSC_SUCCESS);
2900 }
2901 
2902 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2903 {
2904   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2905   PetscMPIInt size;
2906 
2907   PetscFunctionBegin;
2908   if (B->hash_active) {
2909     B->ops[0]      = b->cops;
2910     B->hash_active = PETSC_FALSE;
2911   }
2912   PetscCall(PetscLayoutSetUp(B->rmap));
2913   PetscCall(PetscLayoutSetUp(B->cmap));
2914 
2915 #if defined(PETSC_USE_CTABLE)
2916   PetscCall(PetscHMapIDestroy(&b->colmap));
2917 #else
2918   PetscCall(PetscFree(b->colmap));
2919 #endif
2920   PetscCall(PetscFree(b->garray));
2921   PetscCall(VecDestroy(&b->lvec));
2922   PetscCall(VecScatterDestroy(&b->Mvctx));
2923 
2924   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2925 
2926   MatSeqXAIJGetOptions_Private(b->B);
2927   PetscCall(MatDestroy(&b->B));
2928   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2929   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2930   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2931   PetscCall(MatSetType(b->B, MATSEQAIJ));
2932   MatSeqXAIJRestoreOptions_Private(b->B);
2933 
2934   MatSeqXAIJGetOptions_Private(b->A);
2935   PetscCall(MatDestroy(&b->A));
2936   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2937   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2938   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2939   PetscCall(MatSetType(b->A, MATSEQAIJ));
2940   MatSeqXAIJRestoreOptions_Private(b->A);
2941 
2942   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2943   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2944   B->preallocated  = PETSC_TRUE;
2945   B->was_assembled = PETSC_FALSE;
2946   B->assembled     = PETSC_FALSE;
2947   PetscFunctionReturn(PETSC_SUCCESS);
2948 }
2949 
2950 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2951 {
2952   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2953 
2954   PetscFunctionBegin;
2955   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2956   PetscCall(PetscLayoutSetUp(B->rmap));
2957   PetscCall(PetscLayoutSetUp(B->cmap));
2958   if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2959   else {
2960 #if defined(PETSC_USE_CTABLE)
2961     PetscCall(PetscHMapIDestroy(&b->colmap));
2962 #else
2963     PetscCall(PetscFree(b->colmap));
2964 #endif
2965     PetscCall(PetscFree(b->garray));
2966     PetscCall(VecDestroy(&b->lvec));
2967   }
2968   PetscCall(VecScatterDestroy(&b->Mvctx));
2969 
2970   PetscCall(MatResetPreallocation(b->A));
2971   PetscCall(MatResetPreallocation(b->B));
2972   B->preallocated  = PETSC_TRUE;
2973   B->was_assembled = PETSC_FALSE;
2974   B->assembled     = PETSC_FALSE;
2975   PetscFunctionReturn(PETSC_SUCCESS);
2976 }
2977 
2978 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2979 {
2980   Mat         mat;
2981   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2982 
2983   PetscFunctionBegin;
2984   *newmat = NULL;
2985   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2986   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2987   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2988   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2989   a = (Mat_MPIAIJ *)mat->data;
2990 
2991   mat->factortype = matin->factortype;
2992   mat->assembled  = matin->assembled;
2993   mat->insertmode = NOT_SET_VALUES;
2994 
2995   a->size         = oldmat->size;
2996   a->rank         = oldmat->rank;
2997   a->donotstash   = oldmat->donotstash;
2998   a->roworiented  = oldmat->roworiented;
2999   a->rowindices   = NULL;
3000   a->rowvalues    = NULL;
3001   a->getrowactive = PETSC_FALSE;
3002 
3003   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3004   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3005   if (matin->hash_active) {
3006     PetscCall(MatSetUp(mat));
3007   } else {
3008     mat->preallocated = matin->preallocated;
3009     if (oldmat->colmap) {
3010 #if defined(PETSC_USE_CTABLE)
3011       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3012 #else
3013       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3014       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3015 #endif
3016     } else a->colmap = NULL;
3017     if (oldmat->garray) {
3018       PetscInt len;
3019       len = oldmat->B->cmap->n;
3020       PetscCall(PetscMalloc1(len + 1, &a->garray));
3021       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3022     } else a->garray = NULL;
3023 
3024     /* It may happen MatDuplicate is called with a non-assembled matrix
3025       In fact, MatDuplicate only requires the matrix to be preallocated
3026       This may happen inside a DMCreateMatrix_Shell */
3027     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3028     if (oldmat->Mvctx) {
3029       a->Mvctx = oldmat->Mvctx;
3030       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3031     }
3032     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3033     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3034   }
3035   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3036   *newmat = mat;
3037   PetscFunctionReturn(PETSC_SUCCESS);
3038 }
3039 
3040 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3041 {
3042   PetscBool isbinary, ishdf5;
3043 
3044   PetscFunctionBegin;
3045   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3046   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3047   /* force binary viewer to load .info file if it has not yet done so */
3048   PetscCall(PetscViewerSetUp(viewer));
3049   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3050   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3051   if (isbinary) {
3052     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3053   } else if (ishdf5) {
3054 #if defined(PETSC_HAVE_HDF5)
3055     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3056 #else
3057     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3058 #endif
3059   } else {
3060     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3061   }
3062   PetscFunctionReturn(PETSC_SUCCESS);
3063 }
3064 
3065 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3066 {
3067   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3068   PetscInt    *rowidxs, *colidxs;
3069   PetscScalar *matvals;
3070 
3071   PetscFunctionBegin;
3072   PetscCall(PetscViewerSetUp(viewer));
3073 
3074   /* read in matrix header */
3075   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3076   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3077   M  = header[1];
3078   N  = header[2];
3079   nz = header[3];
3080   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3081   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3082   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3083 
3084   /* set block sizes from the viewer's .info file */
3085   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3086   /* set global sizes if not set already */
3087   if (mat->rmap->N < 0) mat->rmap->N = M;
3088   if (mat->cmap->N < 0) mat->cmap->N = N;
3089   PetscCall(PetscLayoutSetUp(mat->rmap));
3090   PetscCall(PetscLayoutSetUp(mat->cmap));
3091 
3092   /* check if the matrix sizes are correct */
3093   PetscCall(MatGetSize(mat, &rows, &cols));
3094   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3095 
3096   /* read in row lengths and build row indices */
3097   PetscCall(MatGetLocalSize(mat, &m, NULL));
3098   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3099   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3100   rowidxs[0] = 0;
3101   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3102   if (nz != PETSC_INT_MAX) {
3103     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3104     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3105   }
3106 
3107   /* read in column indices and matrix values */
3108   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3109   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3110   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3111   /* store matrix indices and values */
3112   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3113   PetscCall(PetscFree(rowidxs));
3114   PetscCall(PetscFree2(colidxs, matvals));
3115   PetscFunctionReturn(PETSC_SUCCESS);
3116 }
3117 
3118 /* Not scalable because of ISAllGather() unless getting all columns. */
3119 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3120 {
3121   IS          iscol_local;
3122   PetscBool   isstride;
3123   PetscMPIInt gisstride = 0;
3124 
3125   PetscFunctionBegin;
3126   /* check if we are grabbing all columns*/
3127   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3128 
3129   if (isstride) {
3130     PetscInt start, len, mstart, mlen;
3131     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3132     PetscCall(ISGetLocalSize(iscol, &len));
3133     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3134     if (mstart == start && mlen - mstart == len) gisstride = 1;
3135   }
3136 
3137   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3138   if (gisstride) {
3139     PetscInt N;
3140     PetscCall(MatGetSize(mat, NULL, &N));
3141     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3142     PetscCall(ISSetIdentity(iscol_local));
3143     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3144   } else {
3145     PetscInt cbs;
3146     PetscCall(ISGetBlockSize(iscol, &cbs));
3147     PetscCall(ISAllGather(iscol, &iscol_local));
3148     PetscCall(ISSetBlockSize(iscol_local, cbs));
3149   }
3150 
3151   *isseq = iscol_local;
3152   PetscFunctionReturn(PETSC_SUCCESS);
3153 }
3154 
3155 /*
3156  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3157  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3158 
3159  Input Parameters:
3160 +   mat - matrix
3161 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3162            i.e., mat->rstart <= isrow[i] < mat->rend
3163 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3164            i.e., mat->cstart <= iscol[i] < mat->cend
3165 
3166  Output Parameters:
3167 +   isrow_d - sequential row index set for retrieving mat->A
3168 .   iscol_d - sequential  column index set for retrieving mat->A
3169 .   iscol_o - sequential column index set for retrieving mat->B
3170 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3171  */
3172 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3173 {
3174   Vec             x, cmap;
3175   const PetscInt *is_idx;
3176   PetscScalar    *xarray, *cmaparray;
3177   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3178   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3179   Mat             B    = a->B;
3180   Vec             lvec = a->lvec, lcmap;
3181   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3182   MPI_Comm        comm;
3183   VecScatter      Mvctx = a->Mvctx;
3184 
3185   PetscFunctionBegin;
3186   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3187   PetscCall(ISGetLocalSize(iscol, &ncols));
3188 
3189   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3190   PetscCall(MatCreateVecs(mat, &x, NULL));
3191   PetscCall(VecSet(x, -1.0));
3192   PetscCall(VecDuplicate(x, &cmap));
3193   PetscCall(VecSet(cmap, -1.0));
3194 
3195   /* Get start indices */
3196   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3197   isstart -= ncols;
3198   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3199 
3200   PetscCall(ISGetIndices(iscol, &is_idx));
3201   PetscCall(VecGetArray(x, &xarray));
3202   PetscCall(VecGetArray(cmap, &cmaparray));
3203   PetscCall(PetscMalloc1(ncols, &idx));
3204   for (i = 0; i < ncols; i++) {
3205     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3206     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3207     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3208   }
3209   PetscCall(VecRestoreArray(x, &xarray));
3210   PetscCall(VecRestoreArray(cmap, &cmaparray));
3211   PetscCall(ISRestoreIndices(iscol, &is_idx));
3212 
3213   /* Get iscol_d */
3214   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3215   PetscCall(ISGetBlockSize(iscol, &i));
3216   PetscCall(ISSetBlockSize(*iscol_d, i));
3217 
3218   /* Get isrow_d */
3219   PetscCall(ISGetLocalSize(isrow, &m));
3220   rstart = mat->rmap->rstart;
3221   PetscCall(PetscMalloc1(m, &idx));
3222   PetscCall(ISGetIndices(isrow, &is_idx));
3223   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3224   PetscCall(ISRestoreIndices(isrow, &is_idx));
3225 
3226   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3227   PetscCall(ISGetBlockSize(isrow, &i));
3228   PetscCall(ISSetBlockSize(*isrow_d, i));
3229 
3230   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3231   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3232   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3233 
3234   PetscCall(VecDuplicate(lvec, &lcmap));
3235 
3236   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3237   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3238 
3239   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3240   /* off-process column indices */
3241   count = 0;
3242   PetscCall(PetscMalloc1(Bn, &idx));
3243   PetscCall(PetscMalloc1(Bn, &cmap1));
3244 
3245   PetscCall(VecGetArray(lvec, &xarray));
3246   PetscCall(VecGetArray(lcmap, &cmaparray));
3247   for (i = 0; i < Bn; i++) {
3248     if (PetscRealPart(xarray[i]) > -1.0) {
3249       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3250       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3251       count++;
3252     }
3253   }
3254   PetscCall(VecRestoreArray(lvec, &xarray));
3255   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3256 
3257   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3258   /* cannot ensure iscol_o has same blocksize as iscol! */
3259 
3260   PetscCall(PetscFree(idx));
3261   *garray = cmap1;
3262 
3263   PetscCall(VecDestroy(&x));
3264   PetscCall(VecDestroy(&cmap));
3265   PetscCall(VecDestroy(&lcmap));
3266   PetscFunctionReturn(PETSC_SUCCESS);
3267 }
3268 
3269 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3270 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3271 {
3272   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3273   Mat         M = NULL;
3274   MPI_Comm    comm;
3275   IS          iscol_d, isrow_d, iscol_o;
3276   Mat         Asub = NULL, Bsub = NULL;
3277   PetscInt    n, count, M_size, N_size;
3278 
3279   PetscFunctionBegin;
3280   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3281 
3282   if (call == MAT_REUSE_MATRIX) {
3283     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3284     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3285     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3286 
3287     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3288     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3289 
3290     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3291     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3292 
3293     /* Update diagonal and off-diagonal portions of submat */
3294     asub = (Mat_MPIAIJ *)(*submat)->data;
3295     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3296     PetscCall(ISGetLocalSize(iscol_o, &n));
3297     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3298     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3299     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3300 
3301   } else { /* call == MAT_INITIAL_MATRIX) */
3302     PetscInt *garray, *garray_compact;
3303     PetscInt  BsubN;
3304 
3305     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3306     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3307 
3308     /* Create local submatrices Asub and Bsub */
3309     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3310     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3311 
3312     // Compact garray so its not of size Bn
3313     PetscCall(ISGetSize(iscol_o, &count));
3314     PetscCall(PetscMalloc1(count, &garray_compact));
3315     PetscCall(PetscArraycpy(garray_compact, garray, count));
3316 
3317     /* Create submatrix M */
3318     PetscCall(ISGetSize(isrow, &M_size));
3319     PetscCall(ISGetSize(iscol, &N_size));
3320     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M));
3321 
3322     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3323     asub = (Mat_MPIAIJ *)M->data;
3324 
3325     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3326     n = asub->B->cmap->N;
3327     if (BsubN > n) {
3328       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3329       const PetscInt *idx;
3330       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3331       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3332 
3333       PetscCall(PetscMalloc1(n, &idx_new));
3334       j = 0;
3335       PetscCall(ISGetIndices(iscol_o, &idx));
3336       for (i = 0; i < n; i++) {
3337         if (j >= BsubN) break;
3338         while (subgarray[i] > garray[j]) j++;
3339 
3340         if (subgarray[i] == garray[j]) {
3341           idx_new[i] = idx[j++];
3342         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3343       }
3344       PetscCall(ISRestoreIndices(iscol_o, &idx));
3345 
3346       PetscCall(ISDestroy(&iscol_o));
3347       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3348 
3349     } else if (BsubN < n) {
3350       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3351     }
3352 
3353     PetscCall(PetscFree(garray));
3354     *submat = M;
3355 
3356     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3357     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3358     PetscCall(ISDestroy(&isrow_d));
3359 
3360     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3361     PetscCall(ISDestroy(&iscol_d));
3362 
3363     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3364     PetscCall(ISDestroy(&iscol_o));
3365   }
3366   PetscFunctionReturn(PETSC_SUCCESS);
3367 }
3368 
3369 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3370 {
3371   IS        iscol_local = NULL, isrow_d;
3372   PetscInt  csize;
3373   PetscInt  n, i, j, start, end;
3374   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3375   MPI_Comm  comm;
3376 
3377   PetscFunctionBegin;
3378   /* If isrow has same processor distribution as mat,
3379      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3380   if (call == MAT_REUSE_MATRIX) {
3381     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3382     if (isrow_d) {
3383       sameRowDist  = PETSC_TRUE;
3384       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3385     } else {
3386       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3387       if (iscol_local) {
3388         sameRowDist  = PETSC_TRUE;
3389         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3390       }
3391     }
3392   } else {
3393     /* Check if isrow has same processor distribution as mat */
3394     sameDist[0] = PETSC_FALSE;
3395     PetscCall(ISGetLocalSize(isrow, &n));
3396     if (!n) {
3397       sameDist[0] = PETSC_TRUE;
3398     } else {
3399       PetscCall(ISGetMinMax(isrow, &i, &j));
3400       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3401       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3402     }
3403 
3404     /* Check if iscol has same processor distribution as mat */
3405     sameDist[1] = PETSC_FALSE;
3406     PetscCall(ISGetLocalSize(iscol, &n));
3407     if (!n) {
3408       sameDist[1] = PETSC_TRUE;
3409     } else {
3410       PetscCall(ISGetMinMax(iscol, &i, &j));
3411       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3412       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3413     }
3414 
3415     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3416     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3417     sameRowDist = tsameDist[0];
3418   }
3419 
3420   if (sameRowDist) {
3421     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3422       /* isrow and iscol have same processor distribution as mat */
3423       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3424       PetscFunctionReturn(PETSC_SUCCESS);
3425     } else { /* sameRowDist */
3426       /* isrow has same processor distribution as mat */
3427       if (call == MAT_INITIAL_MATRIX) {
3428         PetscBool sorted;
3429         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3430         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3431         PetscCall(ISGetSize(iscol, &i));
3432         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3433 
3434         PetscCall(ISSorted(iscol_local, &sorted));
3435         if (sorted) {
3436           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3437           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3438           PetscFunctionReturn(PETSC_SUCCESS);
3439         }
3440       } else { /* call == MAT_REUSE_MATRIX */
3441         IS iscol_sub;
3442         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3443         if (iscol_sub) {
3444           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3445           PetscFunctionReturn(PETSC_SUCCESS);
3446         }
3447       }
3448     }
3449   }
3450 
3451   /* General case: iscol -> iscol_local which has global size of iscol */
3452   if (call == MAT_REUSE_MATRIX) {
3453     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3454     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3455   } else {
3456     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3457   }
3458 
3459   PetscCall(ISGetLocalSize(iscol, &csize));
3460   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3461 
3462   if (call == MAT_INITIAL_MATRIX) {
3463     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3464     PetscCall(ISDestroy(&iscol_local));
3465   }
3466   PetscFunctionReturn(PETSC_SUCCESS);
3467 }
3468 
3469 /*@C
3470   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3471   and "off-diagonal" part of the matrix in CSR format.
3472 
3473   Collective
3474 
3475   Input Parameters:
3476 + comm   - MPI communicator
3477 . M      - the global row size
3478 . N      - the global column size
3479 . A      - "diagonal" portion of matrix
3480 . B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3481 - garray - either `NULL` or the global index of `B` columns
3482 
3483   Output Parameter:
3484 . mat - the matrix, with input `A` as its local diagonal matrix
3485 
3486   Level: advanced
3487 
3488   Notes:
3489   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3490 
3491   `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore.
3492 
3493 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3494 @*/
3495 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat)
3496 {
3497   PetscInt m, n;
3498   MatType  mpi_mat_type;
3499 
3500   PetscFunctionBegin;
3501   PetscCall(MatCreate(comm, mat));
3502   PetscCall(MatGetSize(A, &m, &n));
3503   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3504   PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3505 
3506   PetscCall(MatSetSizes(*mat, m, n, M, N));
3507   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3508   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3509   PetscCall(MatSetType(*mat, mpi_mat_type));
3510 
3511   if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3512 
3513   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3514   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3515   PetscCall(MatSetMPIAIJWithSplitSeqAIJ(*mat, A, B, garray));
3516   PetscFunctionReturn(PETSC_SUCCESS);
3517 }
3518 
3519 /*
3520   MatSetMPIAIJWithSplitSeqAIJ - Set the diag and offdiag matrices of a `MATMPIAIJ` matrix.
3521    It is similar to `MatCreateMPIAIJWithSplitArrays()`. This routine allows passing in
3522    B with local indices and the correct size, along with the accompanying
3523    garray, hence skipping compactification
3524 
3525   Collective
3526 
3527   Input Parameters:
3528 +  mat    - the MATMPIAIJ matrix, which should have its type and layout set, but should not have its diag, offdiag matrices set
3529 .  A      - the diag matrix using local col ids
3530 .  B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3531 -  garray - either `NULL` or the global index of `B` columns
3532 
3533   Output Parameter:
3534 .  mat   - the updated `MATMPIAIJ` matrix
3535 
3536   Level: advanced
3537 
3538   Notes:
3539   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3540 
3541   `A` and `B` become part of output mat. The user cannot use `A` and `B` anymore.
3542 
3543 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3544 */
3545 PETSC_INTERN PetscErrorCode MatSetMPIAIJWithSplitSeqAIJ(Mat mat, Mat A, Mat B, PetscInt *garray)
3546 {
3547   PetscFunctionBegin;
3548   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
3549   PetscInt    m, n, M, N, Am, An, Bm, Bn;
3550 
3551   PetscCall(MatGetSize(mat, &M, &N));
3552   PetscCall(MatGetLocalSize(mat, &m, &n));
3553   PetscCall(MatGetLocalSize(A, &Am, &An));
3554   PetscCall(MatGetLocalSize(B, &Bm, &Bn));
3555 
3556   PetscCheck(m == Am && m == Bm, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of rows do not match");
3557   PetscCheck(n == An, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of columns do not match");
3558   PetscCheck(!mpiaij->A && !mpiaij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "A, B of the MPIAIJ matrix are not empty");
3559   mpiaij->A      = A;
3560   mpiaij->B      = B;
3561   mpiaij->garray = garray;
3562 
3563   mat->preallocated     = PETSC_TRUE;
3564   mat->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */
3565 
3566   PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3567   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
3568   /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and
3569    also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced
3570    */
3571   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3572   PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3573   PetscCall(MatSetOption(mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3574   PetscFunctionReturn(PETSC_SUCCESS);
3575 }
3576 
3577 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3578 
3579 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3580 {
3581   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3582   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3583   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3584   Mat             M, Msub, B = a->B;
3585   MatScalar      *aa;
3586   Mat_SeqAIJ     *aij;
3587   PetscInt       *garray = a->garray, *colsub, Ncols;
3588   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3589   IS              iscol_sub, iscmap;
3590   const PetscInt *is_idx, *cmap;
3591   PetscBool       allcolumns = PETSC_FALSE;
3592   MPI_Comm        comm;
3593 
3594   PetscFunctionBegin;
3595   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3596   if (call == MAT_REUSE_MATRIX) {
3597     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3598     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3599     PetscCall(ISGetLocalSize(iscol_sub, &count));
3600 
3601     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3602     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3603 
3604     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3605     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3606 
3607     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3608 
3609   } else { /* call == MAT_INITIAL_MATRIX) */
3610     PetscBool flg;
3611 
3612     PetscCall(ISGetLocalSize(iscol, &n));
3613     PetscCall(ISGetSize(iscol, &Ncols));
3614 
3615     /* (1) iscol -> nonscalable iscol_local */
3616     /* Check for special case: each processor gets entire matrix columns */
3617     PetscCall(ISIdentity(iscol_local, &flg));
3618     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3619     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3620     if (allcolumns) {
3621       iscol_sub = iscol_local;
3622       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3623       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3624 
3625     } else {
3626       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3627       PetscInt *idx, *cmap1, k;
3628       PetscCall(PetscMalloc1(Ncols, &idx));
3629       PetscCall(PetscMalloc1(Ncols, &cmap1));
3630       PetscCall(ISGetIndices(iscol_local, &is_idx));
3631       count = 0;
3632       k     = 0;
3633       for (i = 0; i < Ncols; i++) {
3634         j = is_idx[i];
3635         if (j >= cstart && j < cend) {
3636           /* diagonal part of mat */
3637           idx[count]     = j;
3638           cmap1[count++] = i; /* column index in submat */
3639         } else if (Bn) {
3640           /* off-diagonal part of mat */
3641           if (j == garray[k]) {
3642             idx[count]     = j;
3643             cmap1[count++] = i; /* column index in submat */
3644           } else if (j > garray[k]) {
3645             while (j > garray[k] && k < Bn - 1) k++;
3646             if (j == garray[k]) {
3647               idx[count]     = j;
3648               cmap1[count++] = i; /* column index in submat */
3649             }
3650           }
3651         }
3652       }
3653       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3654 
3655       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3656       PetscCall(ISGetBlockSize(iscol, &cbs));
3657       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3658 
3659       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3660     }
3661 
3662     /* (3) Create sequential Msub */
3663     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3664   }
3665 
3666   PetscCall(ISGetLocalSize(iscol_sub, &count));
3667   aij = (Mat_SeqAIJ *)Msub->data;
3668   ii  = aij->i;
3669   PetscCall(ISGetIndices(iscmap, &cmap));
3670 
3671   /*
3672       m - number of local rows
3673       Ncols - number of columns (same on all processors)
3674       rstart - first row in new global matrix generated
3675   */
3676   PetscCall(MatGetSize(Msub, &m, NULL));
3677 
3678   if (call == MAT_INITIAL_MATRIX) {
3679     /* (4) Create parallel newmat */
3680     PetscMPIInt rank, size;
3681     PetscInt    csize;
3682 
3683     PetscCallMPI(MPI_Comm_size(comm, &size));
3684     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3685 
3686     /*
3687         Determine the number of non-zeros in the diagonal and off-diagonal
3688         portions of the matrix in order to do correct preallocation
3689     */
3690 
3691     /* first get start and end of "diagonal" columns */
3692     PetscCall(ISGetLocalSize(iscol, &csize));
3693     if (csize == PETSC_DECIDE) {
3694       PetscCall(ISGetSize(isrow, &mglobal));
3695       if (mglobal == Ncols) { /* square matrix */
3696         nlocal = m;
3697       } else {
3698         nlocal = Ncols / size + ((Ncols % size) > rank);
3699       }
3700     } else {
3701       nlocal = csize;
3702     }
3703     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3704     rstart = rend - nlocal;
3705     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3706 
3707     /* next, compute all the lengths */
3708     jj = aij->j;
3709     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3710     olens = dlens + m;
3711     for (i = 0; i < m; i++) {
3712       jend = ii[i + 1] - ii[i];
3713       olen = 0;
3714       dlen = 0;
3715       for (j = 0; j < jend; j++) {
3716         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3717         else dlen++;
3718         jj++;
3719       }
3720       olens[i] = olen;
3721       dlens[i] = dlen;
3722     }
3723 
3724     PetscCall(ISGetBlockSize(isrow, &bs));
3725     PetscCall(ISGetBlockSize(iscol, &cbs));
3726 
3727     PetscCall(MatCreate(comm, &M));
3728     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3729     PetscCall(MatSetBlockSizes(M, bs, cbs));
3730     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3731     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3732     PetscCall(PetscFree(dlens));
3733 
3734   } else { /* call == MAT_REUSE_MATRIX */
3735     M = *newmat;
3736     PetscCall(MatGetLocalSize(M, &i, NULL));
3737     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3738     PetscCall(MatZeroEntries(M));
3739     /*
3740          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3741        rather than the slower MatSetValues().
3742     */
3743     M->was_assembled = PETSC_TRUE;
3744     M->assembled     = PETSC_FALSE;
3745   }
3746 
3747   /* (5) Set values of Msub to *newmat */
3748   PetscCall(PetscMalloc1(count, &colsub));
3749   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3750 
3751   jj = aij->j;
3752   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3753   for (i = 0; i < m; i++) {
3754     row = rstart + i;
3755     nz  = ii[i + 1] - ii[i];
3756     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3757     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3758     jj += nz;
3759     aa += nz;
3760   }
3761   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3762   PetscCall(ISRestoreIndices(iscmap, &cmap));
3763 
3764   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3765   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3766 
3767   PetscCall(PetscFree(colsub));
3768 
3769   /* save Msub, iscol_sub and iscmap used in processor for next request */
3770   if (call == MAT_INITIAL_MATRIX) {
3771     *newmat = M;
3772     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3773     PetscCall(MatDestroy(&Msub));
3774 
3775     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3776     PetscCall(ISDestroy(&iscol_sub));
3777 
3778     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3779     PetscCall(ISDestroy(&iscmap));
3780 
3781     if (iscol_local) {
3782       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3783       PetscCall(ISDestroy(&iscol_local));
3784     }
3785   }
3786   PetscFunctionReturn(PETSC_SUCCESS);
3787 }
3788 
3789 /*
3790     Not great since it makes two copies of the submatrix, first an SeqAIJ
3791   in local and then by concatenating the local matrices the end result.
3792   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3793 
3794   This requires a sequential iscol with all indices.
3795 */
3796 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3797 {
3798   PetscMPIInt rank, size;
3799   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3800   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3801   Mat         M, Mreuse;
3802   MatScalar  *aa, *vwork;
3803   MPI_Comm    comm;
3804   Mat_SeqAIJ *aij;
3805   PetscBool   colflag, allcolumns = PETSC_FALSE;
3806 
3807   PetscFunctionBegin;
3808   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3809   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3810   PetscCallMPI(MPI_Comm_size(comm, &size));
3811 
3812   /* Check for special case: each processor gets entire matrix columns */
3813   PetscCall(ISIdentity(iscol, &colflag));
3814   PetscCall(ISGetLocalSize(iscol, &n));
3815   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3816   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3817 
3818   if (call == MAT_REUSE_MATRIX) {
3819     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3820     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3821     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3822   } else {
3823     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3824   }
3825 
3826   /*
3827       m - number of local rows
3828       n - number of columns (same on all processors)
3829       rstart - first row in new global matrix generated
3830   */
3831   PetscCall(MatGetSize(Mreuse, &m, &n));
3832   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3833   if (call == MAT_INITIAL_MATRIX) {
3834     aij = (Mat_SeqAIJ *)Mreuse->data;
3835     ii  = aij->i;
3836     jj  = aij->j;
3837 
3838     /*
3839         Determine the number of non-zeros in the diagonal and off-diagonal
3840         portions of the matrix in order to do correct preallocation
3841     */
3842 
3843     /* first get start and end of "diagonal" columns */
3844     if (csize == PETSC_DECIDE) {
3845       PetscCall(ISGetSize(isrow, &mglobal));
3846       if (mglobal == n) { /* square matrix */
3847         nlocal = m;
3848       } else {
3849         nlocal = n / size + ((n % size) > rank);
3850       }
3851     } else {
3852       nlocal = csize;
3853     }
3854     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3855     rstart = rend - nlocal;
3856     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3857 
3858     /* next, compute all the lengths */
3859     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3860     olens = dlens + m;
3861     for (i = 0; i < m; i++) {
3862       jend = ii[i + 1] - ii[i];
3863       olen = 0;
3864       dlen = 0;
3865       for (j = 0; j < jend; j++) {
3866         if (*jj < rstart || *jj >= rend) olen++;
3867         else dlen++;
3868         jj++;
3869       }
3870       olens[i] = olen;
3871       dlens[i] = dlen;
3872     }
3873     PetscCall(MatCreate(comm, &M));
3874     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3875     PetscCall(MatSetBlockSizes(M, bs, cbs));
3876     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3877     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3878     PetscCall(PetscFree(dlens));
3879   } else {
3880     PetscInt ml, nl;
3881 
3882     M = *newmat;
3883     PetscCall(MatGetLocalSize(M, &ml, &nl));
3884     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3885     PetscCall(MatZeroEntries(M));
3886     /*
3887          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3888        rather than the slower MatSetValues().
3889     */
3890     M->was_assembled = PETSC_TRUE;
3891     M->assembled     = PETSC_FALSE;
3892   }
3893   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3894   aij = (Mat_SeqAIJ *)Mreuse->data;
3895   ii  = aij->i;
3896   jj  = aij->j;
3897 
3898   /* trigger copy to CPU if needed */
3899   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3900   for (i = 0; i < m; i++) {
3901     row   = rstart + i;
3902     nz    = ii[i + 1] - ii[i];
3903     cwork = jj;
3904     jj    = PetscSafePointerPlusOffset(jj, nz);
3905     vwork = aa;
3906     aa    = PetscSafePointerPlusOffset(aa, nz);
3907     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3908   }
3909   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3910 
3911   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3912   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3913   *newmat = M;
3914 
3915   /* save submatrix used in processor for next request */
3916   if (call == MAT_INITIAL_MATRIX) {
3917     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3918     PetscCall(MatDestroy(&Mreuse));
3919   }
3920   PetscFunctionReturn(PETSC_SUCCESS);
3921 }
3922 
3923 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3924 {
3925   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3926   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3927   const PetscInt *JJ;
3928   PetscBool       nooffprocentries;
3929   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3930 
3931   PetscFunctionBegin;
3932   PetscCall(PetscLayoutSetUp(B->rmap));
3933   PetscCall(PetscLayoutSetUp(B->cmap));
3934   m       = B->rmap->n;
3935   cstart  = B->cmap->rstart;
3936   cend    = B->cmap->rend;
3937   rstart  = B->rmap->rstart;
3938   irstart = Ii[0];
3939 
3940   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3941 
3942   if (PetscDefined(USE_DEBUG)) {
3943     for (i = 0; i < m; i++) {
3944       nnz = Ii[i + 1] - Ii[i];
3945       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3946       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3947       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3948       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3949     }
3950   }
3951 
3952   for (i = 0; i < m; i++) {
3953     nnz     = Ii[i + 1] - Ii[i];
3954     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3955     nnz_max = PetscMax(nnz_max, nnz);
3956     d       = 0;
3957     for (j = 0; j < nnz; j++) {
3958       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3959     }
3960     d_nnz[i] = d;
3961     o_nnz[i] = nnz - d;
3962   }
3963   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3964   PetscCall(PetscFree2(d_nnz, o_nnz));
3965 
3966   for (i = 0; i < m; i++) {
3967     ii = i + rstart;
3968     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3969   }
3970   nooffprocentries    = B->nooffprocentries;
3971   B->nooffprocentries = PETSC_TRUE;
3972   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3973   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3974   B->nooffprocentries = nooffprocentries;
3975 
3976   /* count number of entries below block diagonal */
3977   PetscCall(PetscFree(Aij->ld));
3978   PetscCall(PetscCalloc1(m, &ld));
3979   Aij->ld = ld;
3980   for (i = 0; i < m; i++) {
3981     nnz = Ii[i + 1] - Ii[i];
3982     j   = 0;
3983     while (j < nnz && J[j] < cstart) j++;
3984     ld[i] = j;
3985     if (J) J += nnz;
3986   }
3987 
3988   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3989   PetscFunctionReturn(PETSC_SUCCESS);
3990 }
3991 
3992 /*@
3993   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3994   (the default parallel PETSc format).
3995 
3996   Collective
3997 
3998   Input Parameters:
3999 + B - the matrix
4000 . i - the indices into `j` for the start of each local row (indices start with zero)
4001 . j - the column indices for each local row (indices start with zero)
4002 - v - optional values in the matrix
4003 
4004   Level: developer
4005 
4006   Notes:
4007   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
4008   thus you CANNOT change the matrix entries by changing the values of `v` after you have
4009   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4010 
4011   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4012 
4013   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
4014 
4015   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
4016 
4017   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
4018   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4019 
4020   The format which is used for the sparse matrix input, is equivalent to a
4021   row-major ordering.. i.e for the following matrix, the input data expected is
4022   as shown
4023 .vb
4024         1 0 0
4025         2 0 3     P0
4026        -------
4027         4 5 6     P1
4028 
4029      Process0 [P0] rows_owned=[0,1]
4030         i =  {0,1,3}  [size = nrow+1  = 2+1]
4031         j =  {0,0,2}  [size = 3]
4032         v =  {1,2,3}  [size = 3]
4033 
4034      Process1 [P1] rows_owned=[2]
4035         i =  {0,3}    [size = nrow+1  = 1+1]
4036         j =  {0,1,2}  [size = 3]
4037         v =  {4,5,6}  [size = 3]
4038 .ve
4039 
4040 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
4041           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4042 @*/
4043 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4044 {
4045   PetscFunctionBegin;
4046   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4047   PetscFunctionReturn(PETSC_SUCCESS);
4048 }
4049 
4050 /*@
4051   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4052   (the default parallel PETSc format).  For good matrix assembly performance
4053   the user should preallocate the matrix storage by setting the parameters
4054   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4055 
4056   Collective
4057 
4058   Input Parameters:
4059 + B     - the matrix
4060 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4061            (same value is used for all local rows)
4062 . d_nnz - array containing the number of nonzeros in the various rows of the
4063            DIAGONAL portion of the local submatrix (possibly different for each row)
4064            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4065            The size of this array is equal to the number of local rows, i.e 'm'.
4066            For matrices that will be factored, you must leave room for (and set)
4067            the diagonal entry even if it is zero.
4068 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4069            submatrix (same value is used for all local rows).
4070 - o_nnz - array containing the number of nonzeros in the various rows of the
4071            OFF-DIAGONAL portion of the local submatrix (possibly different for
4072            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4073            structure. The size of this array is equal to the number
4074            of local rows, i.e 'm'.
4075 
4076   Example Usage:
4077   Consider the following 8x8 matrix with 34 non-zero values, that is
4078   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4079   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4080   as follows
4081 
4082 .vb
4083             1  2  0  |  0  3  0  |  0  4
4084     Proc0   0  5  6  |  7  0  0  |  8  0
4085             9  0 10  | 11  0  0  | 12  0
4086     -------------------------------------
4087            13  0 14  | 15 16 17  |  0  0
4088     Proc1   0 18  0  | 19 20 21  |  0  0
4089             0  0  0  | 22 23  0  | 24  0
4090     -------------------------------------
4091     Proc2  25 26 27  |  0  0 28  | 29  0
4092            30  0  0  | 31 32 33  |  0 34
4093 .ve
4094 
4095   This can be represented as a collection of submatrices as
4096 .vb
4097       A B C
4098       D E F
4099       G H I
4100 .ve
4101 
4102   Where the submatrices A,B,C are owned by proc0, D,E,F are
4103   owned by proc1, G,H,I are owned by proc2.
4104 
4105   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4106   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4107   The 'M','N' parameters are 8,8, and have the same values on all procs.
4108 
4109   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4110   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4111   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4112   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4113   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4114   matrix, and [DF] as another `MATSEQAIJ` matrix.
4115 
4116   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4117   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4118   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4119   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4120   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4121   In this case, the values of `d_nz`, `o_nz` are
4122 .vb
4123      proc0  dnz = 2, o_nz = 2
4124      proc1  dnz = 3, o_nz = 2
4125      proc2  dnz = 1, o_nz = 4
4126 .ve
4127   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4128   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4129   for proc3. i.e we are using 12+15+10=37 storage locations to store
4130   34 values.
4131 
4132   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4133   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4134   In the above case the values for `d_nnz`, `o_nnz` are
4135 .vb
4136      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4137      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4138      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4139 .ve
4140   Here the space allocated is sum of all the above values i.e 34, and
4141   hence pre-allocation is perfect.
4142 
4143   Level: intermediate
4144 
4145   Notes:
4146   If the *_nnz parameter is given then the *_nz parameter is ignored
4147 
4148   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4149   storage.  The stored row and column indices begin with zero.
4150   See [Sparse Matrices](sec_matsparse) for details.
4151 
4152   The parallel matrix is partitioned such that the first m0 rows belong to
4153   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4154   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4155 
4156   The DIAGONAL portion of the local submatrix of a processor can be defined
4157   as the submatrix which is obtained by extraction the part corresponding to
4158   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4159   first row that belongs to the processor, r2 is the last row belonging to
4160   the this processor, and c1-c2 is range of indices of the local part of a
4161   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4162   common case of a square matrix, the row and column ranges are the same and
4163   the DIAGONAL part is also square. The remaining portion of the local
4164   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4165 
4166   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4167 
4168   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4169   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4170   You can also run with the option `-info` and look for messages with the string
4171   malloc in them to see if additional memory allocation was needed.
4172 
4173 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4174           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4175 @*/
4176 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4177 {
4178   PetscFunctionBegin;
4179   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4180   PetscValidType(B, 1);
4181   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4182   PetscFunctionReturn(PETSC_SUCCESS);
4183 }
4184 
4185 /*@
4186   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4187   CSR format for the local rows.
4188 
4189   Collective
4190 
4191   Input Parameters:
4192 + comm - MPI communicator
4193 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4194 . n    - This value should be the same as the local size used in creating the
4195          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4196          calculated if `N` is given) For square matrices n is almost always `m`.
4197 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4198 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4199 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4200 . j    - global column indices
4201 - a    - optional matrix values
4202 
4203   Output Parameter:
4204 . mat - the matrix
4205 
4206   Level: intermediate
4207 
4208   Notes:
4209   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4210   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4211   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4212 
4213   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4214 
4215   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4216 
4217   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4218   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4219 
4220   The format which is used for the sparse matrix input, is equivalent to a
4221   row-major ordering, i.e., for the following matrix, the input data expected is
4222   as shown
4223 .vb
4224         1 0 0
4225         2 0 3     P0
4226        -------
4227         4 5 6     P1
4228 
4229      Process0 [P0] rows_owned=[0,1]
4230         i =  {0,1,3}  [size = nrow+1  = 2+1]
4231         j =  {0,0,2}  [size = 3]
4232         v =  {1,2,3}  [size = 3]
4233 
4234      Process1 [P1] rows_owned=[2]
4235         i =  {0,3}    [size = nrow+1  = 1+1]
4236         j =  {0,1,2}  [size = 3]
4237         v =  {4,5,6}  [size = 3]
4238 .ve
4239 
4240 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4241           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4242 @*/
4243 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4244 {
4245   PetscFunctionBegin;
4246   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4247   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4248   PetscCall(MatCreate(comm, mat));
4249   PetscCall(MatSetSizes(*mat, m, n, M, N));
4250   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4251   PetscCall(MatSetType(*mat, MATMPIAIJ));
4252   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4253   PetscFunctionReturn(PETSC_SUCCESS);
4254 }
4255 
4256 /*@
4257   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4258   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4259   from `MatCreateMPIAIJWithArrays()`
4260 
4261   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4262 
4263   Collective
4264 
4265   Input Parameters:
4266 + mat - the matrix
4267 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4268 . n   - This value should be the same as the local size used in creating the
4269        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4270        calculated if N is given) For square matrices n is almost always m.
4271 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4272 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4273 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4274 . J   - column indices
4275 - v   - matrix values
4276 
4277   Level: deprecated
4278 
4279 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4280           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4281 @*/
4282 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4283 {
4284   PetscInt        nnz, i;
4285   PetscBool       nooffprocentries;
4286   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4287   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4288   PetscScalar    *ad, *ao;
4289   PetscInt        ldi, Iii, md;
4290   const PetscInt *Adi = Ad->i;
4291   PetscInt       *ld  = Aij->ld;
4292 
4293   PetscFunctionBegin;
4294   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4295   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4296   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4297   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4298 
4299   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4300   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4301 
4302   for (i = 0; i < m; i++) {
4303     if (PetscDefined(USE_DEBUG)) {
4304       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4305         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4306         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4307       }
4308     }
4309     nnz = Ii[i + 1] - Ii[i];
4310     Iii = Ii[i];
4311     ldi = ld[i];
4312     md  = Adi[i + 1] - Adi[i];
4313     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4314     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4315     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4316     ad += md;
4317     ao += nnz - md;
4318   }
4319   nooffprocentries      = mat->nooffprocentries;
4320   mat->nooffprocentries = PETSC_TRUE;
4321   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4322   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4323   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4324   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4325   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4326   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4327   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4328   mat->nooffprocentries = nooffprocentries;
4329   PetscFunctionReturn(PETSC_SUCCESS);
4330 }
4331 
4332 /*@
4333   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4334 
4335   Collective
4336 
4337   Input Parameters:
4338 + mat - the matrix
4339 - v   - matrix values, stored by row
4340 
4341   Level: intermediate
4342 
4343   Notes:
4344   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4345 
4346   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4347 
4348 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4349           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4350 @*/
4351 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4352 {
4353   PetscInt        nnz, i, m;
4354   PetscBool       nooffprocentries;
4355   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4356   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4357   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4358   PetscScalar    *ad, *ao;
4359   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4360   PetscInt        ldi, Iii, md;
4361   PetscInt       *ld = Aij->ld;
4362 
4363   PetscFunctionBegin;
4364   m = mat->rmap->n;
4365 
4366   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4367   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4368   Iii = 0;
4369   for (i = 0; i < m; i++) {
4370     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4371     ldi = ld[i];
4372     md  = Adi[i + 1] - Adi[i];
4373     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4374     ad += md;
4375     if (ao) {
4376       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4377       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4378       ao += nnz - md;
4379     }
4380     Iii += nnz;
4381   }
4382   nooffprocentries      = mat->nooffprocentries;
4383   mat->nooffprocentries = PETSC_TRUE;
4384   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4385   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4386   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4387   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4388   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4389   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4390   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4391   mat->nooffprocentries = nooffprocentries;
4392   PetscFunctionReturn(PETSC_SUCCESS);
4393 }
4394 
4395 /*@
4396   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4397   (the default parallel PETSc format).  For good matrix assembly performance
4398   the user should preallocate the matrix storage by setting the parameters
4399   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4400 
4401   Collective
4402 
4403   Input Parameters:
4404 + comm  - MPI communicator
4405 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4406           This value should be the same as the local size used in creating the
4407           y vector for the matrix-vector product y = Ax.
4408 . n     - This value should be the same as the local size used in creating the
4409           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4410           calculated if N is given) For square matrices n is almost always m.
4411 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4412 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4413 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4414           (same value is used for all local rows)
4415 . d_nnz - array containing the number of nonzeros in the various rows of the
4416           DIAGONAL portion of the local submatrix (possibly different for each row)
4417           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4418           The size of this array is equal to the number of local rows, i.e 'm'.
4419 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4420           submatrix (same value is used for all local rows).
4421 - o_nnz - array containing the number of nonzeros in the various rows of the
4422           OFF-DIAGONAL portion of the local submatrix (possibly different for
4423           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4424           structure. The size of this array is equal to the number
4425           of local rows, i.e 'm'.
4426 
4427   Output Parameter:
4428 . A - the matrix
4429 
4430   Options Database Keys:
4431 + -mat_no_inode                     - Do not use inodes
4432 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4433 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4434                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4435                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4436 
4437   Level: intermediate
4438 
4439   Notes:
4440   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4441   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4442   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4443 
4444   If the *_nnz parameter is given then the *_nz parameter is ignored
4445 
4446   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4447   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4448   storage requirements for this matrix.
4449 
4450   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4451   processor than it must be used on all processors that share the object for
4452   that argument.
4453 
4454   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4455   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4456 
4457   The user MUST specify either the local or global matrix dimensions
4458   (possibly both).
4459 
4460   The parallel matrix is partitioned across processors such that the
4461   first `m0` rows belong to process 0, the next `m1` rows belong to
4462   process 1, the next `m2` rows belong to process 2, etc., where
4463   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4464   values corresponding to [m x N] submatrix.
4465 
4466   The columns are logically partitioned with the n0 columns belonging
4467   to 0th partition, the next n1 columns belonging to the next
4468   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4469 
4470   The DIAGONAL portion of the local submatrix on any given processor
4471   is the submatrix corresponding to the rows and columns m,n
4472   corresponding to the given processor. i.e diagonal matrix on
4473   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4474   etc. The remaining portion of the local submatrix [m x (N-n)]
4475   constitute the OFF-DIAGONAL portion. The example below better
4476   illustrates this concept. The two matrices, the DIAGONAL portion and
4477   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4478 
4479   For a square global matrix we define each processor's diagonal portion
4480   to be its local rows and the corresponding columns (a square submatrix);
4481   each processor's off-diagonal portion encompasses the remainder of the
4482   local matrix (a rectangular submatrix).
4483 
4484   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4485 
4486   When calling this routine with a single process communicator, a matrix of
4487   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4488   type of communicator, use the construction mechanism
4489 .vb
4490   MatCreate(..., &A);
4491   MatSetType(A, MATMPIAIJ);
4492   MatSetSizes(A, m, n, M, N);
4493   MatMPIAIJSetPreallocation(A, ...);
4494 .ve
4495 
4496   By default, this format uses inodes (identical nodes) when possible.
4497   We search for consecutive rows with the same nonzero structure, thereby
4498   reusing matrix information to achieve increased efficiency.
4499 
4500   Example Usage:
4501   Consider the following 8x8 matrix with 34 non-zero values, that is
4502   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4503   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4504   as follows
4505 
4506 .vb
4507             1  2  0  |  0  3  0  |  0  4
4508     Proc0   0  5  6  |  7  0  0  |  8  0
4509             9  0 10  | 11  0  0  | 12  0
4510     -------------------------------------
4511            13  0 14  | 15 16 17  |  0  0
4512     Proc1   0 18  0  | 19 20 21  |  0  0
4513             0  0  0  | 22 23  0  | 24  0
4514     -------------------------------------
4515     Proc2  25 26 27  |  0  0 28  | 29  0
4516            30  0  0  | 31 32 33  |  0 34
4517 .ve
4518 
4519   This can be represented as a collection of submatrices as
4520 
4521 .vb
4522       A B C
4523       D E F
4524       G H I
4525 .ve
4526 
4527   Where the submatrices A,B,C are owned by proc0, D,E,F are
4528   owned by proc1, G,H,I are owned by proc2.
4529 
4530   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4531   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4532   The 'M','N' parameters are 8,8, and have the same values on all procs.
4533 
4534   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4535   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4536   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4537   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4538   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4539   matrix, and [DF] as another SeqAIJ matrix.
4540 
4541   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4542   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4543   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4544   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4545   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4546   In this case, the values of `d_nz`,`o_nz` are
4547 .vb
4548      proc0  dnz = 2, o_nz = 2
4549      proc1  dnz = 3, o_nz = 2
4550      proc2  dnz = 1, o_nz = 4
4551 .ve
4552   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4553   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4554   for proc3. i.e we are using 12+15+10=37 storage locations to store
4555   34 values.
4556 
4557   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4558   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4559   In the above case the values for d_nnz,o_nnz are
4560 .vb
4561      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4562      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4563      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4564 .ve
4565   Here the space allocated is sum of all the above values i.e 34, and
4566   hence pre-allocation is perfect.
4567 
4568 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4569           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4570           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4571 @*/
4572 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4573 {
4574   PetscMPIInt size;
4575 
4576   PetscFunctionBegin;
4577   PetscCall(MatCreate(comm, A));
4578   PetscCall(MatSetSizes(*A, m, n, M, N));
4579   PetscCallMPI(MPI_Comm_size(comm, &size));
4580   if (size > 1) {
4581     PetscCall(MatSetType(*A, MATMPIAIJ));
4582     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4583   } else {
4584     PetscCall(MatSetType(*A, MATSEQAIJ));
4585     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4586   }
4587   PetscFunctionReturn(PETSC_SUCCESS);
4588 }
4589 
4590 /*MC
4591     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4592 
4593     Synopsis:
4594     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4595 
4596     Not Collective
4597 
4598     Input Parameter:
4599 .   A - the `MATMPIAIJ` matrix
4600 
4601     Output Parameters:
4602 +   Ad - the diagonal portion of the matrix
4603 .   Ao - the off-diagonal portion of the matrix
4604 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4605 -   ierr - error code
4606 
4607      Level: advanced
4608 
4609     Note:
4610     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4611 
4612 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4613 M*/
4614 
4615 /*MC
4616     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4617 
4618     Synopsis:
4619     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4620 
4621     Not Collective
4622 
4623     Input Parameters:
4624 +   A - the `MATMPIAIJ` matrix
4625 .   Ad - the diagonal portion of the matrix
4626 .   Ao - the off-diagonal portion of the matrix
4627 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4628 -   ierr - error code
4629 
4630      Level: advanced
4631 
4632 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4633 M*/
4634 
4635 /*@C
4636   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4637 
4638   Not Collective
4639 
4640   Input Parameter:
4641 . A - The `MATMPIAIJ` matrix
4642 
4643   Output Parameters:
4644 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4645 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4646 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4647 
4648   Level: intermediate
4649 
4650   Note:
4651   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4652   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4653   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4654   local column numbers to global column numbers in the original matrix.
4655 
4656   Fortran Notes:
4657   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4658 
4659 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4660 @*/
4661 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4662 {
4663   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4664   PetscBool   flg;
4665 
4666   PetscFunctionBegin;
4667   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4668   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4669   if (Ad) *Ad = a->A;
4670   if (Ao) *Ao = a->B;
4671   if (colmap) *colmap = a->garray;
4672   PetscFunctionReturn(PETSC_SUCCESS);
4673 }
4674 
4675 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4676 {
4677   PetscInt     m, N, i, rstart, nnz, Ii;
4678   PetscInt    *indx;
4679   PetscScalar *values;
4680   MatType      rootType;
4681 
4682   PetscFunctionBegin;
4683   PetscCall(MatGetSize(inmat, &m, &N));
4684   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4685     PetscInt *dnz, *onz, sum, bs, cbs;
4686 
4687     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4688     /* Check sum(n) = N */
4689     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4690     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4691 
4692     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4693     rstart -= m;
4694 
4695     MatPreallocateBegin(comm, m, n, dnz, onz);
4696     for (i = 0; i < m; i++) {
4697       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4698       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4699       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4700     }
4701 
4702     PetscCall(MatCreate(comm, outmat));
4703     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4704     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4705     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4706     PetscCall(MatGetRootType_Private(inmat, &rootType));
4707     PetscCall(MatSetType(*outmat, rootType));
4708     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4709     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4710     MatPreallocateEnd(dnz, onz);
4711     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4712   }
4713 
4714   /* numeric phase */
4715   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4716   for (i = 0; i < m; i++) {
4717     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4718     Ii = i + rstart;
4719     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4720     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4721   }
4722   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4723   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4724   PetscFunctionReturn(PETSC_SUCCESS);
4725 }
4726 
4727 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4728 {
4729   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
4730 
4731   PetscFunctionBegin;
4732   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4733   PetscCall(PetscFree(merge->id_r));
4734   PetscCall(PetscFree(merge->len_s));
4735   PetscCall(PetscFree(merge->len_r));
4736   PetscCall(PetscFree(merge->bi));
4737   PetscCall(PetscFree(merge->bj));
4738   PetscCall(PetscFree(merge->buf_ri[0]));
4739   PetscCall(PetscFree(merge->buf_ri));
4740   PetscCall(PetscFree(merge->buf_rj[0]));
4741   PetscCall(PetscFree(merge->buf_rj));
4742   PetscCall(PetscFree(merge->coi));
4743   PetscCall(PetscFree(merge->coj));
4744   PetscCall(PetscFree(merge->owners_co));
4745   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4746   PetscCall(PetscFree(merge));
4747   PetscFunctionReturn(PETSC_SUCCESS);
4748 }
4749 
4750 #include <../src/mat/utils/freespace.h>
4751 #include <petscbt.h>
4752 
4753 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4754 {
4755   MPI_Comm             comm;
4756   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4757   PetscMPIInt          size, rank, taga, *len_s;
4758   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4759   PetscMPIInt          proc, k;
4760   PetscInt           **buf_ri, **buf_rj;
4761   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4762   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4763   MPI_Request         *s_waits, *r_waits;
4764   MPI_Status          *status;
4765   const MatScalar     *aa, *a_a;
4766   MatScalar          **abuf_r, *ba_i;
4767   Mat_Merge_SeqsToMPI *merge;
4768   PetscContainer       container;
4769 
4770   PetscFunctionBegin;
4771   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4772   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4773 
4774   PetscCallMPI(MPI_Comm_size(comm, &size));
4775   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4776 
4777   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4778   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4779   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4780   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4781   aa = a_a;
4782 
4783   bi     = merge->bi;
4784   bj     = merge->bj;
4785   buf_ri = merge->buf_ri;
4786   buf_rj = merge->buf_rj;
4787 
4788   PetscCall(PetscMalloc1(size, &status));
4789   owners = merge->rowmap->range;
4790   len_s  = merge->len_s;
4791 
4792   /* send and recv matrix values */
4793   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4794   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4795 
4796   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4797   for (proc = 0, k = 0; proc < size; proc++) {
4798     if (!len_s[proc]) continue;
4799     i = owners[proc];
4800     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4801     k++;
4802   }
4803 
4804   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4805   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4806   PetscCall(PetscFree(status));
4807 
4808   PetscCall(PetscFree(s_waits));
4809   PetscCall(PetscFree(r_waits));
4810 
4811   /* insert mat values of mpimat */
4812   PetscCall(PetscMalloc1(N, &ba_i));
4813   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4814 
4815   for (k = 0; k < merge->nrecv; k++) {
4816     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4817     nrows       = *buf_ri_k[k];
4818     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4819     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4820   }
4821 
4822   /* set values of ba */
4823   m = merge->rowmap->n;
4824   for (i = 0; i < m; i++) {
4825     arow = owners[rank] + i;
4826     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4827     bnzi = bi[i + 1] - bi[i];
4828     PetscCall(PetscArrayzero(ba_i, bnzi));
4829 
4830     /* add local non-zero vals of this proc's seqmat into ba */
4831     anzi   = ai[arow + 1] - ai[arow];
4832     aj     = a->j + ai[arow];
4833     aa     = a_a + ai[arow];
4834     nextaj = 0;
4835     for (j = 0; nextaj < anzi; j++) {
4836       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4837         ba_i[j] += aa[nextaj++];
4838       }
4839     }
4840 
4841     /* add received vals into ba */
4842     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4843       /* i-th row */
4844       if (i == *nextrow[k]) {
4845         anzi   = *(nextai[k] + 1) - *nextai[k];
4846         aj     = buf_rj[k] + *nextai[k];
4847         aa     = abuf_r[k] + *nextai[k];
4848         nextaj = 0;
4849         for (j = 0; nextaj < anzi; j++) {
4850           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4851             ba_i[j] += aa[nextaj++];
4852           }
4853         }
4854         nextrow[k]++;
4855         nextai[k]++;
4856       }
4857     }
4858     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4859   }
4860   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4861   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4862   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4863 
4864   PetscCall(PetscFree(abuf_r[0]));
4865   PetscCall(PetscFree(abuf_r));
4866   PetscCall(PetscFree(ba_i));
4867   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4868   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4869   PetscFunctionReturn(PETSC_SUCCESS);
4870 }
4871 
4872 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4873 {
4874   Mat                  B_mpi;
4875   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4876   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4877   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4878   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4879   PetscInt             len, *dnz, *onz, bs, cbs;
4880   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4881   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4882   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4883   MPI_Status          *status;
4884   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4885   PetscBT              lnkbt;
4886   Mat_Merge_SeqsToMPI *merge;
4887   PetscContainer       container;
4888 
4889   PetscFunctionBegin;
4890   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4891 
4892   /* make sure it is a PETSc comm */
4893   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4894   PetscCallMPI(MPI_Comm_size(comm, &size));
4895   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4896 
4897   PetscCall(PetscNew(&merge));
4898   PetscCall(PetscMalloc1(size, &status));
4899 
4900   /* determine row ownership */
4901   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4902   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4903   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4904   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4905   PetscCall(PetscLayoutSetUp(merge->rowmap));
4906   PetscCall(PetscMalloc1(size, &len_si));
4907   PetscCall(PetscMalloc1(size, &merge->len_s));
4908 
4909   m      = merge->rowmap->n;
4910   owners = merge->rowmap->range;
4911 
4912   /* determine the number of messages to send, their lengths */
4913   len_s = merge->len_s;
4914 
4915   len          = 0; /* length of buf_si[] */
4916   merge->nsend = 0;
4917   for (PetscMPIInt proc = 0; proc < size; proc++) {
4918     len_si[proc] = 0;
4919     if (proc == rank) {
4920       len_s[proc] = 0;
4921     } else {
4922       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4923       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4924     }
4925     if (len_s[proc]) {
4926       merge->nsend++;
4927       nrows = 0;
4928       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4929         if (ai[i + 1] > ai[i]) nrows++;
4930       }
4931       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4932       len += len_si[proc];
4933     }
4934   }
4935 
4936   /* determine the number and length of messages to receive for ij-structure */
4937   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4938   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4939 
4940   /* post the Irecv of j-structure */
4941   PetscCall(PetscCommGetNewTag(comm, &tagj));
4942   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4943 
4944   /* post the Isend of j-structure */
4945   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4946 
4947   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4948     if (!len_s[proc]) continue;
4949     i = owners[proc];
4950     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4951     k++;
4952   }
4953 
4954   /* receives and sends of j-structure are complete */
4955   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4956   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4957 
4958   /* send and recv i-structure */
4959   PetscCall(PetscCommGetNewTag(comm, &tagi));
4960   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4961 
4962   PetscCall(PetscMalloc1(len + 1, &buf_s));
4963   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4964   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4965     if (!len_s[proc]) continue;
4966     /* form outgoing message for i-structure:
4967          buf_si[0]:                 nrows to be sent
4968                [1:nrows]:           row index (global)
4969                [nrows+1:2*nrows+1]: i-structure index
4970     */
4971     nrows       = len_si[proc] / 2 - 1;
4972     buf_si_i    = buf_si + nrows + 1;
4973     buf_si[0]   = nrows;
4974     buf_si_i[0] = 0;
4975     nrows       = 0;
4976     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4977       anzi = ai[i + 1] - ai[i];
4978       if (anzi) {
4979         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4980         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4981         nrows++;
4982       }
4983     }
4984     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4985     k++;
4986     buf_si += len_si[proc];
4987   }
4988 
4989   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4990   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4991 
4992   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4993   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4994 
4995   PetscCall(PetscFree(len_si));
4996   PetscCall(PetscFree(len_ri));
4997   PetscCall(PetscFree(rj_waits));
4998   PetscCall(PetscFree2(si_waits, sj_waits));
4999   PetscCall(PetscFree(ri_waits));
5000   PetscCall(PetscFree(buf_s));
5001   PetscCall(PetscFree(status));
5002 
5003   /* compute a local seq matrix in each processor */
5004   /* allocate bi array and free space for accumulating nonzero column info */
5005   PetscCall(PetscMalloc1(m + 1, &bi));
5006   bi[0] = 0;
5007 
5008   /* create and initialize a linked list */
5009   nlnk = N + 1;
5010   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
5011 
5012   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5013   len = ai[owners[rank + 1]] - ai[owners[rank]];
5014   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
5015 
5016   current_space = free_space;
5017 
5018   /* determine symbolic info for each local row */
5019   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
5020 
5021   for (k = 0; k < merge->nrecv; k++) {
5022     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5023     nrows       = *buf_ri_k[k];
5024     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5025     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5026   }
5027 
5028   MatPreallocateBegin(comm, m, n, dnz, onz);
5029   len = 0;
5030   for (i = 0; i < m; i++) {
5031     bnzi = 0;
5032     /* add local non-zero cols of this proc's seqmat into lnk */
5033     arow = owners[rank] + i;
5034     anzi = ai[arow + 1] - ai[arow];
5035     aj   = a->j + ai[arow];
5036     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5037     bnzi += nlnk;
5038     /* add received col data into lnk */
5039     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5040       if (i == *nextrow[k]) {            /* i-th row */
5041         anzi = *(nextai[k] + 1) - *nextai[k];
5042         aj   = buf_rj[k] + *nextai[k];
5043         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5044         bnzi += nlnk;
5045         nextrow[k]++;
5046         nextai[k]++;
5047       }
5048     }
5049     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5050 
5051     /* if free space is not available, make more free space */
5052     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5053     /* copy data into free space, then initialize lnk */
5054     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5055     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5056 
5057     current_space->array += bnzi;
5058     current_space->local_used += bnzi;
5059     current_space->local_remaining -= bnzi;
5060 
5061     bi[i + 1] = bi[i] + bnzi;
5062   }
5063 
5064   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5065 
5066   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5067   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5068   PetscCall(PetscLLDestroy(lnk, lnkbt));
5069 
5070   /* create symbolic parallel matrix B_mpi */
5071   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5072   PetscCall(MatCreate(comm, &B_mpi));
5073   if (n == PETSC_DECIDE) {
5074     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5075   } else {
5076     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5077   }
5078   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5079   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5080   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5081   MatPreallocateEnd(dnz, onz);
5082   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5083 
5084   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5085   B_mpi->assembled = PETSC_FALSE;
5086   merge->bi        = bi;
5087   merge->bj        = bj;
5088   merge->buf_ri    = buf_ri;
5089   merge->buf_rj    = buf_rj;
5090   merge->coi       = NULL;
5091   merge->coj       = NULL;
5092   merge->owners_co = NULL;
5093 
5094   PetscCall(PetscCommDestroy(&comm));
5095 
5096   /* attach the supporting struct to B_mpi for reuse */
5097   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5098   PetscCall(PetscContainerSetPointer(container, merge));
5099   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5100   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5101   PetscCall(PetscContainerDestroy(&container));
5102   *mpimat = B_mpi;
5103 
5104   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5105   PetscFunctionReturn(PETSC_SUCCESS);
5106 }
5107 
5108 /*@
5109   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5110   matrices from each processor
5111 
5112   Collective
5113 
5114   Input Parameters:
5115 + comm   - the communicators the parallel matrix will live on
5116 . seqmat - the input sequential matrices
5117 . m      - number of local rows (or `PETSC_DECIDE`)
5118 . n      - number of local columns (or `PETSC_DECIDE`)
5119 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5120 
5121   Output Parameter:
5122 . mpimat - the parallel matrix generated
5123 
5124   Level: advanced
5125 
5126   Note:
5127   The dimensions of the sequential matrix in each processor MUST be the same.
5128   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5129   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5130 
5131 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5132 @*/
5133 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5134 {
5135   PetscMPIInt size;
5136 
5137   PetscFunctionBegin;
5138   PetscCallMPI(MPI_Comm_size(comm, &size));
5139   if (size == 1) {
5140     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5141     if (scall == MAT_INITIAL_MATRIX) {
5142       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5143     } else {
5144       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5145     }
5146     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5147     PetscFunctionReturn(PETSC_SUCCESS);
5148   }
5149   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5150   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5151   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5152   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5153   PetscFunctionReturn(PETSC_SUCCESS);
5154 }
5155 
5156 /*@
5157   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5158 
5159   Not Collective
5160 
5161   Input Parameter:
5162 . A - the matrix
5163 
5164   Output Parameter:
5165 . A_loc - the local sequential matrix generated
5166 
5167   Level: developer
5168 
5169   Notes:
5170   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5171   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5172   `n` is the global column count obtained with `MatGetSize()`
5173 
5174   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5175 
5176   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5177 
5178   Destroy the matrix with `MatDestroy()`
5179 
5180 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5181 @*/
5182 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5183 {
5184   PetscBool mpi;
5185 
5186   PetscFunctionBegin;
5187   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5188   if (mpi) {
5189     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5190   } else {
5191     *A_loc = A;
5192     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5193   }
5194   PetscFunctionReturn(PETSC_SUCCESS);
5195 }
5196 
5197 /*@
5198   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5199 
5200   Not Collective
5201 
5202   Input Parameters:
5203 + A     - the matrix
5204 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5205 
5206   Output Parameter:
5207 . A_loc - the local sequential matrix generated
5208 
5209   Level: developer
5210 
5211   Notes:
5212   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5213   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5214   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5215 
5216   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5217 
5218   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5219   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5220   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5221   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5222 
5223 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5224 @*/
5225 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5226 {
5227   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5228   Mat_SeqAIJ        *mat, *a, *b;
5229   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5230   const PetscScalar *aa, *ba, *aav, *bav;
5231   PetscScalar       *ca, *cam;
5232   PetscMPIInt        size;
5233   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5234   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5235   PetscBool          match;
5236 
5237   PetscFunctionBegin;
5238   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5239   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5240   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5241   if (size == 1) {
5242     if (scall == MAT_INITIAL_MATRIX) {
5243       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5244       *A_loc = mpimat->A;
5245     } else if (scall == MAT_REUSE_MATRIX) {
5246       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5247     }
5248     PetscFunctionReturn(PETSC_SUCCESS);
5249   }
5250 
5251   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5252   a  = (Mat_SeqAIJ *)mpimat->A->data;
5253   b  = (Mat_SeqAIJ *)mpimat->B->data;
5254   ai = a->i;
5255   aj = a->j;
5256   bi = b->i;
5257   bj = b->j;
5258   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5259   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5260   aa = aav;
5261   ba = bav;
5262   if (scall == MAT_INITIAL_MATRIX) {
5263     PetscCall(PetscMalloc1(1 + am, &ci));
5264     ci[0] = 0;
5265     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5266     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5267     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5268     k = 0;
5269     for (i = 0; i < am; i++) {
5270       ncols_o = bi[i + 1] - bi[i];
5271       ncols_d = ai[i + 1] - ai[i];
5272       /* off-diagonal portion of A */
5273       for (jo = 0; jo < ncols_o; jo++) {
5274         col = cmap[*bj];
5275         if (col >= cstart) break;
5276         cj[k] = col;
5277         bj++;
5278         ca[k++] = *ba++;
5279       }
5280       /* diagonal portion of A */
5281       for (j = 0; j < ncols_d; j++) {
5282         cj[k]   = cstart + *aj++;
5283         ca[k++] = *aa++;
5284       }
5285       /* off-diagonal portion of A */
5286       for (j = jo; j < ncols_o; j++) {
5287         cj[k]   = cmap[*bj++];
5288         ca[k++] = *ba++;
5289       }
5290     }
5291     /* put together the new matrix */
5292     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5293     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5294     /* Since these are PETSc arrays, change flags to free them as necessary. */
5295     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5296     mat->free_a  = PETSC_TRUE;
5297     mat->free_ij = PETSC_TRUE;
5298     mat->nonew   = 0;
5299   } else if (scall == MAT_REUSE_MATRIX) {
5300     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5301     ci  = mat->i;
5302     cj  = mat->j;
5303     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5304     for (i = 0; i < am; i++) {
5305       /* off-diagonal portion of A */
5306       ncols_o = bi[i + 1] - bi[i];
5307       for (jo = 0; jo < ncols_o; jo++) {
5308         col = cmap[*bj];
5309         if (col >= cstart) break;
5310         *cam++ = *ba++;
5311         bj++;
5312       }
5313       /* diagonal portion of A */
5314       ncols_d = ai[i + 1] - ai[i];
5315       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5316       /* off-diagonal portion of A */
5317       for (j = jo; j < ncols_o; j++) {
5318         *cam++ = *ba++;
5319         bj++;
5320       }
5321     }
5322     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5323   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5324   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5325   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5326   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5327   PetscFunctionReturn(PETSC_SUCCESS);
5328 }
5329 
5330 /*@
5331   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5332   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5333 
5334   Not Collective
5335 
5336   Input Parameters:
5337 + A     - the matrix
5338 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5339 
5340   Output Parameters:
5341 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5342 - A_loc - the local sequential matrix generated
5343 
5344   Level: developer
5345 
5346   Note:
5347   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5348   part, then those associated with the off-diagonal part (in its local ordering)
5349 
5350 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5351 @*/
5352 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5353 {
5354   Mat             Ao, Ad;
5355   const PetscInt *cmap;
5356   PetscMPIInt     size;
5357   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5358 
5359   PetscFunctionBegin;
5360   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5361   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5362   if (size == 1) {
5363     if (scall == MAT_INITIAL_MATRIX) {
5364       PetscCall(PetscObjectReference((PetscObject)Ad));
5365       *A_loc = Ad;
5366     } else if (scall == MAT_REUSE_MATRIX) {
5367       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5368     }
5369     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5370     PetscFunctionReturn(PETSC_SUCCESS);
5371   }
5372   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5373   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5374   if (f) {
5375     PetscCall((*f)(A, scall, glob, A_loc));
5376   } else {
5377     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5378     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5379     Mat_SeqAIJ        *c;
5380     PetscInt          *ai = a->i, *aj = a->j;
5381     PetscInt          *bi = b->i, *bj = b->j;
5382     PetscInt          *ci, *cj;
5383     const PetscScalar *aa, *ba;
5384     PetscScalar       *ca;
5385     PetscInt           i, j, am, dn, on;
5386 
5387     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5388     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5389     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5390     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5391     if (scall == MAT_INITIAL_MATRIX) {
5392       PetscInt k;
5393       PetscCall(PetscMalloc1(1 + am, &ci));
5394       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5395       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5396       ci[0] = 0;
5397       for (i = 0, k = 0; i < am; i++) {
5398         const PetscInt ncols_o = bi[i + 1] - bi[i];
5399         const PetscInt ncols_d = ai[i + 1] - ai[i];
5400         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5401         /* diagonal portion of A */
5402         for (j = 0; j < ncols_d; j++, k++) {
5403           cj[k] = *aj++;
5404           ca[k] = *aa++;
5405         }
5406         /* off-diagonal portion of A */
5407         for (j = 0; j < ncols_o; j++, k++) {
5408           cj[k] = dn + *bj++;
5409           ca[k] = *ba++;
5410         }
5411       }
5412       /* put together the new matrix */
5413       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5414       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5415       /* Since these are PETSc arrays, change flags to free them as necessary. */
5416       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5417       c->free_a  = PETSC_TRUE;
5418       c->free_ij = PETSC_TRUE;
5419       c->nonew   = 0;
5420       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5421     } else if (scall == MAT_REUSE_MATRIX) {
5422       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5423       for (i = 0; i < am; i++) {
5424         const PetscInt ncols_d = ai[i + 1] - ai[i];
5425         const PetscInt ncols_o = bi[i + 1] - bi[i];
5426         /* diagonal portion of A */
5427         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5428         /* off-diagonal portion of A */
5429         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5430       }
5431       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5432     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5433     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5434     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5435     if (glob) {
5436       PetscInt cst, *gidx;
5437 
5438       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5439       PetscCall(PetscMalloc1(dn + on, &gidx));
5440       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5441       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5442       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5443     }
5444   }
5445   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5446   PetscFunctionReturn(PETSC_SUCCESS);
5447 }
5448 
5449 /*@C
5450   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5451 
5452   Not Collective
5453 
5454   Input Parameters:
5455 + A     - the matrix
5456 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5457 . row   - index set of rows to extract (or `NULL`)
5458 - col   - index set of columns to extract (or `NULL`)
5459 
5460   Output Parameter:
5461 . A_loc - the local sequential matrix generated
5462 
5463   Level: developer
5464 
5465 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5466 @*/
5467 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5468 {
5469   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5470   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5471   IS          isrowa, iscola;
5472   Mat        *aloc;
5473   PetscBool   match;
5474 
5475   PetscFunctionBegin;
5476   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5477   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5478   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5479   if (!row) {
5480     start = A->rmap->rstart;
5481     end   = A->rmap->rend;
5482     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5483   } else {
5484     isrowa = *row;
5485   }
5486   if (!col) {
5487     start = A->cmap->rstart;
5488     cmap  = a->garray;
5489     nzA   = a->A->cmap->n;
5490     nzB   = a->B->cmap->n;
5491     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5492     ncols = 0;
5493     for (i = 0; i < nzB; i++) {
5494       if (cmap[i] < start) idx[ncols++] = cmap[i];
5495       else break;
5496     }
5497     imark = i;
5498     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5499     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5500     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5501   } else {
5502     iscola = *col;
5503   }
5504   if (scall != MAT_INITIAL_MATRIX) {
5505     PetscCall(PetscMalloc1(1, &aloc));
5506     aloc[0] = *A_loc;
5507   }
5508   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5509   if (!col) { /* attach global id of condensed columns */
5510     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5511   }
5512   *A_loc = aloc[0];
5513   PetscCall(PetscFree(aloc));
5514   if (!row) PetscCall(ISDestroy(&isrowa));
5515   if (!col) PetscCall(ISDestroy(&iscola));
5516   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5517   PetscFunctionReturn(PETSC_SUCCESS);
5518 }
5519 
5520 /*
5521  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5522  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5523  * on a global size.
5524  * */
5525 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5526 {
5527   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5528   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5529   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5530   PetscMPIInt            owner;
5531   PetscSFNode           *iremote, *oiremote;
5532   const PetscInt        *lrowindices;
5533   PetscSF                sf, osf;
5534   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5535   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5536   MPI_Comm               comm;
5537   ISLocalToGlobalMapping mapping;
5538   const PetscScalar     *pd_a, *po_a;
5539 
5540   PetscFunctionBegin;
5541   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5542   /* plocalsize is the number of roots
5543    * nrows is the number of leaves
5544    * */
5545   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5546   PetscCall(ISGetLocalSize(rows, &nrows));
5547   PetscCall(PetscCalloc1(nrows, &iremote));
5548   PetscCall(ISGetIndices(rows, &lrowindices));
5549   for (i = 0; i < nrows; i++) {
5550     /* Find a remote index and an owner for a row
5551      * The row could be local or remote
5552      * */
5553     owner = 0;
5554     lidx  = 0;
5555     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5556     iremote[i].index = lidx;
5557     iremote[i].rank  = owner;
5558   }
5559   /* Create SF to communicate how many nonzero columns for each row */
5560   PetscCall(PetscSFCreate(comm, &sf));
5561   /* SF will figure out the number of nonzero columns for each row, and their
5562    * offsets
5563    * */
5564   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5565   PetscCall(PetscSFSetFromOptions(sf));
5566   PetscCall(PetscSFSetUp(sf));
5567 
5568   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5569   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5570   PetscCall(PetscCalloc1(nrows, &pnnz));
5571   roffsets[0] = 0;
5572   roffsets[1] = 0;
5573   for (i = 0; i < plocalsize; i++) {
5574     /* diagonal */
5575     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5576     /* off-diagonal */
5577     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5578     /* compute offsets so that we relative location for each row */
5579     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5580     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5581   }
5582   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5583   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5584   /* 'r' means root, and 'l' means leaf */
5585   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5586   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5587   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5588   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5589   PetscCall(PetscSFDestroy(&sf));
5590   PetscCall(PetscFree(roffsets));
5591   PetscCall(PetscFree(nrcols));
5592   dntotalcols = 0;
5593   ontotalcols = 0;
5594   ncol        = 0;
5595   for (i = 0; i < nrows; i++) {
5596     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5597     ncol    = PetscMax(pnnz[i], ncol);
5598     /* diagonal */
5599     dntotalcols += nlcols[i * 2 + 0];
5600     /* off-diagonal */
5601     ontotalcols += nlcols[i * 2 + 1];
5602   }
5603   /* We do not need to figure the right number of columns
5604    * since all the calculations will be done by going through the raw data
5605    * */
5606   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5607   PetscCall(MatSetUp(*P_oth));
5608   PetscCall(PetscFree(pnnz));
5609   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5610   /* diagonal */
5611   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5612   /* off-diagonal */
5613   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5614   /* diagonal */
5615   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5616   /* off-diagonal */
5617   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5618   dntotalcols = 0;
5619   ontotalcols = 0;
5620   ntotalcols  = 0;
5621   for (i = 0; i < nrows; i++) {
5622     owner = 0;
5623     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5624     /* Set iremote for diag matrix */
5625     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5626       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5627       iremote[dntotalcols].rank  = owner;
5628       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5629       ilocal[dntotalcols++] = ntotalcols++;
5630     }
5631     /* off-diagonal */
5632     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5633       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5634       oiremote[ontotalcols].rank  = owner;
5635       oilocal[ontotalcols++]      = ntotalcols++;
5636     }
5637   }
5638   PetscCall(ISRestoreIndices(rows, &lrowindices));
5639   PetscCall(PetscFree(loffsets));
5640   PetscCall(PetscFree(nlcols));
5641   PetscCall(PetscSFCreate(comm, &sf));
5642   /* P serves as roots and P_oth is leaves
5643    * Diag matrix
5644    * */
5645   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5646   PetscCall(PetscSFSetFromOptions(sf));
5647   PetscCall(PetscSFSetUp(sf));
5648 
5649   PetscCall(PetscSFCreate(comm, &osf));
5650   /* off-diagonal */
5651   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5652   PetscCall(PetscSFSetFromOptions(osf));
5653   PetscCall(PetscSFSetUp(osf));
5654   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5655   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5656   /* operate on the matrix internal data to save memory */
5657   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5658   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5659   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5660   /* Convert to global indices for diag matrix */
5661   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5662   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5663   /* We want P_oth store global indices */
5664   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5665   /* Use memory scalable approach */
5666   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5667   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5668   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5669   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5670   /* Convert back to local indices */
5671   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5672   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5673   nout = 0;
5674   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5675   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5676   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5677   /* Exchange values */
5678   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5679   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5680   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5681   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5682   /* Stop PETSc from shrinking memory */
5683   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5684   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5685   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5686   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5687   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5688   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5689   PetscCall(PetscSFDestroy(&sf));
5690   PetscCall(PetscSFDestroy(&osf));
5691   PetscFunctionReturn(PETSC_SUCCESS);
5692 }
5693 
5694 /*
5695  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5696  * This supports MPIAIJ and MAIJ
5697  * */
5698 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5699 {
5700   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5701   Mat_SeqAIJ *p_oth;
5702   IS          rows, map;
5703   PetscHMapI  hamp;
5704   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5705   MPI_Comm    comm;
5706   PetscSF     sf, osf;
5707   PetscBool   has;
5708 
5709   PetscFunctionBegin;
5710   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5711   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5712   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5713    *  and then create a submatrix (that often is an overlapping matrix)
5714    * */
5715   if (reuse == MAT_INITIAL_MATRIX) {
5716     /* Use a hash table to figure out unique keys */
5717     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5718     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5719     count = 0;
5720     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5721     for (i = 0; i < a->B->cmap->n; i++) {
5722       key = a->garray[i] / dof;
5723       PetscCall(PetscHMapIHas(hamp, key, &has));
5724       if (!has) {
5725         mapping[i] = count;
5726         PetscCall(PetscHMapISet(hamp, key, count++));
5727       } else {
5728         /* Current 'i' has the same value the previous step */
5729         mapping[i] = count - 1;
5730       }
5731     }
5732     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5733     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5734     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5735     PetscCall(PetscCalloc1(htsize, &rowindices));
5736     off = 0;
5737     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5738     PetscCall(PetscHMapIDestroy(&hamp));
5739     PetscCall(PetscSortInt(htsize, rowindices));
5740     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5741     /* In case, the matrix was already created but users want to recreate the matrix */
5742     PetscCall(MatDestroy(P_oth));
5743     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5744     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5745     PetscCall(ISDestroy(&map));
5746     PetscCall(ISDestroy(&rows));
5747   } else if (reuse == MAT_REUSE_MATRIX) {
5748     /* If matrix was already created, we simply update values using SF objects
5749      * that as attached to the matrix earlier.
5750      */
5751     const PetscScalar *pd_a, *po_a;
5752 
5753     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5754     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5755     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5756     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5757     /* Update values in place */
5758     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5759     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5760     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5761     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5762     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5763     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5764     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5765     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5766   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5767   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5768   PetscFunctionReturn(PETSC_SUCCESS);
5769 }
5770 
5771 /*@C
5772   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5773 
5774   Collective
5775 
5776   Input Parameters:
5777 + A     - the first matrix in `MATMPIAIJ` format
5778 . B     - the second matrix in `MATMPIAIJ` format
5779 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5780 
5781   Output Parameters:
5782 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5783 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5784 - B_seq - the sequential matrix generated
5785 
5786   Level: developer
5787 
5788 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5789 @*/
5790 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5791 {
5792   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5793   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5794   IS          isrowb, iscolb;
5795   Mat        *bseq = NULL;
5796 
5797   PetscFunctionBegin;
5798   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5799              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5800   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5801 
5802   if (scall == MAT_INITIAL_MATRIX) {
5803     start = A->cmap->rstart;
5804     cmap  = a->garray;
5805     nzA   = a->A->cmap->n;
5806     nzB   = a->B->cmap->n;
5807     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5808     ncols = 0;
5809     for (i = 0; i < nzB; i++) { /* row < local row index */
5810       if (cmap[i] < start) idx[ncols++] = cmap[i];
5811       else break;
5812     }
5813     imark = i;
5814     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5815     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5816     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5817     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5818   } else {
5819     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5820     isrowb = *rowb;
5821     iscolb = *colb;
5822     PetscCall(PetscMalloc1(1, &bseq));
5823     bseq[0] = *B_seq;
5824   }
5825   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5826   *B_seq = bseq[0];
5827   PetscCall(PetscFree(bseq));
5828   if (!rowb) {
5829     PetscCall(ISDestroy(&isrowb));
5830   } else {
5831     *rowb = isrowb;
5832   }
5833   if (!colb) {
5834     PetscCall(ISDestroy(&iscolb));
5835   } else {
5836     *colb = iscolb;
5837   }
5838   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5839   PetscFunctionReturn(PETSC_SUCCESS);
5840 }
5841 
5842 /*
5843     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5844     of the OFF-DIAGONAL portion of local A
5845 
5846     Collective
5847 
5848    Input Parameters:
5849 +    A,B - the matrices in `MATMPIAIJ` format
5850 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5851 
5852    Output Parameter:
5853 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5854 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5855 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5856 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5857 
5858     Developer Note:
5859     This directly accesses information inside the VecScatter associated with the matrix-vector product
5860      for this matrix. This is not desirable..
5861 
5862     Level: developer
5863 
5864 */
5865 
5866 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5867 {
5868   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5869   VecScatter         ctx;
5870   MPI_Comm           comm;
5871   const PetscMPIInt *rprocs, *sprocs;
5872   PetscMPIInt        nrecvs, nsends;
5873   const PetscInt    *srow, *rstarts, *sstarts;
5874   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5875   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5876   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5877   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5878   PetscMPIInt        size, tag, rank, nreqs;
5879 
5880   PetscFunctionBegin;
5881   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5882   PetscCallMPI(MPI_Comm_size(comm, &size));
5883 
5884   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5885              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5886   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5887   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5888 
5889   if (size == 1) {
5890     startsj_s = NULL;
5891     bufa_ptr  = NULL;
5892     *B_oth    = NULL;
5893     PetscFunctionReturn(PETSC_SUCCESS);
5894   }
5895 
5896   ctx = a->Mvctx;
5897   tag = ((PetscObject)ctx)->tag;
5898 
5899   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5900   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5901   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5902   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5903   PetscCall(PetscMalloc1(nreqs, &reqs));
5904   rwaits = reqs;
5905   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5906 
5907   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5908   if (scall == MAT_INITIAL_MATRIX) {
5909     /* i-array */
5910     /*  post receives */
5911     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5912     for (i = 0; i < nrecvs; i++) {
5913       rowlen = rvalues + rstarts[i] * rbs;
5914       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5915       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5916     }
5917 
5918     /* pack the outgoing message */
5919     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5920 
5921     sstartsj[0] = 0;
5922     rstartsj[0] = 0;
5923     len         = 0; /* total length of j or a array to be sent */
5924     if (nsends) {
5925       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5926       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5927     }
5928     for (i = 0; i < nsends; i++) {
5929       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5930       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5931       for (j = 0; j < nrows; j++) {
5932         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5933         for (l = 0; l < sbs; l++) {
5934           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5935 
5936           rowlen[j * sbs + l] = ncols;
5937 
5938           len += ncols;
5939           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5940         }
5941         k++;
5942       }
5943       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5944 
5945       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5946     }
5947     /* recvs and sends of i-array are completed */
5948     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5949     PetscCall(PetscFree(svalues));
5950 
5951     /* allocate buffers for sending j and a arrays */
5952     PetscCall(PetscMalloc1(len + 1, &bufj));
5953     PetscCall(PetscMalloc1(len + 1, &bufa));
5954 
5955     /* create i-array of B_oth */
5956     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5957 
5958     b_othi[0] = 0;
5959     len       = 0; /* total length of j or a array to be received */
5960     k         = 0;
5961     for (i = 0; i < nrecvs; i++) {
5962       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5963       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5964       for (j = 0; j < nrows; j++) {
5965         b_othi[k + 1] = b_othi[k] + rowlen[j];
5966         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5967         k++;
5968       }
5969       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5970     }
5971     PetscCall(PetscFree(rvalues));
5972 
5973     /* allocate space for j and a arrays of B_oth */
5974     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5975     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5976 
5977     /* j-array */
5978     /*  post receives of j-array */
5979     for (i = 0; i < nrecvs; i++) {
5980       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5981       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5982     }
5983 
5984     /* pack the outgoing message j-array */
5985     if (nsends) k = sstarts[0];
5986     for (i = 0; i < nsends; i++) {
5987       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5988       bufJ  = bufj + sstartsj[i];
5989       for (j = 0; j < nrows; j++) {
5990         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5991         for (ll = 0; ll < sbs; ll++) {
5992           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5993           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5994           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5995         }
5996       }
5997       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5998     }
5999 
6000     /* recvs and sends of j-array are completed */
6001     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6002   } else if (scall == MAT_REUSE_MATRIX) {
6003     sstartsj = *startsj_s;
6004     rstartsj = *startsj_r;
6005     bufa     = *bufa_ptr;
6006     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
6007   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
6008 
6009   /* a-array */
6010   /*  post receives of a-array */
6011   for (i = 0; i < nrecvs; i++) {
6012     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
6013     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
6014   }
6015 
6016   /* pack the outgoing message a-array */
6017   if (nsends) k = sstarts[0];
6018   for (i = 0; i < nsends; i++) {
6019     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
6020     bufA  = bufa + sstartsj[i];
6021     for (j = 0; j < nrows; j++) {
6022       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6023       for (ll = 0; ll < sbs; ll++) {
6024         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6025         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6026         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6027       }
6028     }
6029     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6030   }
6031   /* recvs and sends of a-array are completed */
6032   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6033   PetscCall(PetscFree(reqs));
6034 
6035   if (scall == MAT_INITIAL_MATRIX) {
6036     Mat_SeqAIJ *b_oth;
6037 
6038     /* put together the new matrix */
6039     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6040 
6041     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6042     /* Since these are PETSc arrays, change flags to free them as necessary. */
6043     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6044     b_oth->free_a  = PETSC_TRUE;
6045     b_oth->free_ij = PETSC_TRUE;
6046     b_oth->nonew   = 0;
6047 
6048     PetscCall(PetscFree(bufj));
6049     if (!startsj_s || !bufa_ptr) {
6050       PetscCall(PetscFree2(sstartsj, rstartsj));
6051       PetscCall(PetscFree(bufa_ptr));
6052     } else {
6053       *startsj_s = sstartsj;
6054       *startsj_r = rstartsj;
6055       *bufa_ptr  = bufa;
6056     }
6057   } else if (scall == MAT_REUSE_MATRIX) {
6058     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6059   }
6060 
6061   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6062   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6063   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6064   PetscFunctionReturn(PETSC_SUCCESS);
6065 }
6066 
6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6068 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6069 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6070 #if defined(PETSC_HAVE_MKL_SPARSE)
6071 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6072 #endif
6073 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6074 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6075 #if defined(PETSC_HAVE_ELEMENTAL)
6076 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6077 #endif
6078 #if defined(PETSC_HAVE_SCALAPACK)
6079 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6080 #endif
6081 #if defined(PETSC_HAVE_HYPRE)
6082 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6083 #endif
6084 #if defined(PETSC_HAVE_CUDA)
6085 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6086 #endif
6087 #if defined(PETSC_HAVE_HIP)
6088 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6089 #endif
6090 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6091 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6092 #endif
6093 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6094 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6095 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6096 
6097 /*
6098     Computes (B'*A')' since computing B*A directly is untenable
6099 
6100                n                       p                          p
6101         [             ]       [             ]         [                 ]
6102       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6103         [             ]       [             ]         [                 ]
6104 
6105 */
6106 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6107 {
6108   Mat At, Bt, Ct;
6109 
6110   PetscFunctionBegin;
6111   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6112   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6113   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6114   PetscCall(MatDestroy(&At));
6115   PetscCall(MatDestroy(&Bt));
6116   PetscCall(MatTransposeSetPrecursor(Ct, C));
6117   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6118   PetscCall(MatDestroy(&Ct));
6119   PetscFunctionReturn(PETSC_SUCCESS);
6120 }
6121 
6122 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6123 {
6124   PetscBool cisdense;
6125 
6126   PetscFunctionBegin;
6127   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6128   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6129   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6130   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6131   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6132   PetscCall(MatSetUp(C));
6133 
6134   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6135   PetscFunctionReturn(PETSC_SUCCESS);
6136 }
6137 
6138 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6139 {
6140   Mat_Product *product = C->product;
6141   Mat          A = product->A, B = product->B;
6142 
6143   PetscFunctionBegin;
6144   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6145              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6146   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6147   C->ops->productsymbolic = MatProductSymbolic_AB;
6148   PetscFunctionReturn(PETSC_SUCCESS);
6149 }
6150 
6151 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6152 {
6153   Mat_Product *product = C->product;
6154 
6155   PetscFunctionBegin;
6156   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6157   PetscFunctionReturn(PETSC_SUCCESS);
6158 }
6159 
6160 /*
6161    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6162 
6163   Input Parameters:
6164 
6165     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6166     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6167 
6168     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6169 
6170     For Set1, j1[] contains column indices of the nonzeros.
6171     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6172     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6173     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6174 
6175     Similar for Set2.
6176 
6177     This routine merges the two sets of nonzeros row by row and removes repeats.
6178 
6179   Output Parameters: (memory is allocated by the caller)
6180 
6181     i[],j[]: the CSR of the merged matrix, which has m rows.
6182     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6183     imap2[]: similar to imap1[], but for Set2.
6184     Note we order nonzeros row-by-row and from left to right.
6185 */
6186 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6187 {
6188   PetscInt   r, m; /* Row index of mat */
6189   PetscCount t, t1, t2, b1, e1, b2, e2;
6190 
6191   PetscFunctionBegin;
6192   PetscCall(MatGetLocalSize(mat, &m, NULL));
6193   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6194   i[0]        = 0;
6195   for (r = 0; r < m; r++) { /* Do row by row merging */
6196     b1 = rowBegin1[r];
6197     e1 = rowEnd1[r];
6198     b2 = rowBegin2[r];
6199     e2 = rowEnd2[r];
6200     while (b1 < e1 && b2 < e2) {
6201       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6202         j[t]      = j1[b1];
6203         imap1[t1] = t;
6204         imap2[t2] = t;
6205         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6206         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6207         t1++;
6208         t2++;
6209         t++;
6210       } else if (j1[b1] < j2[b2]) {
6211         j[t]      = j1[b1];
6212         imap1[t1] = t;
6213         b1 += jmap1[t1 + 1] - jmap1[t1];
6214         t1++;
6215         t++;
6216       } else {
6217         j[t]      = j2[b2];
6218         imap2[t2] = t;
6219         b2 += jmap2[t2 + 1] - jmap2[t2];
6220         t2++;
6221         t++;
6222       }
6223     }
6224     /* Merge the remaining in either j1[] or j2[] */
6225     while (b1 < e1) {
6226       j[t]      = j1[b1];
6227       imap1[t1] = t;
6228       b1 += jmap1[t1 + 1] - jmap1[t1];
6229       t1++;
6230       t++;
6231     }
6232     while (b2 < e2) {
6233       j[t]      = j2[b2];
6234       imap2[t2] = t;
6235       b2 += jmap2[t2 + 1] - jmap2[t2];
6236       t2++;
6237       t++;
6238     }
6239     PetscCall(PetscIntCast(t, i + r + 1));
6240   }
6241   PetscFunctionReturn(PETSC_SUCCESS);
6242 }
6243 
6244 /*
6245   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6246 
6247   Input Parameters:
6248     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6249     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6250       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6251 
6252       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6253       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6254 
6255   Output Parameters:
6256     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6257     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6258       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6259       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6260 
6261     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6262       Atot: number of entries belonging to the diagonal block.
6263       Annz: number of unique nonzeros belonging to the diagonal block.
6264       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6265         repeats (i.e., same 'i,j' pair).
6266       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6267         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6268 
6269       Atot: number of entries belonging to the diagonal block
6270       Annz: number of unique nonzeros belonging to the diagonal block.
6271 
6272     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6273 
6274     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6275 */
6276 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6277 {
6278   PetscInt    cstart, cend, rstart, rend, row, col;
6279   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6280   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6281   PetscCount  k, m, p, q, r, s, mid;
6282   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6283 
6284   PetscFunctionBegin;
6285   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6286   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6287   m = rend - rstart;
6288 
6289   /* Skip negative rows */
6290   for (k = 0; k < n; k++)
6291     if (i[k] >= 0) break;
6292 
6293   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6294      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6295   */
6296   while (k < n) {
6297     row = i[k];
6298     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6299     for (s = k; s < n; s++)
6300       if (i[s] != row) break;
6301 
6302     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6303     for (p = k; p < s; p++) {
6304       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6305       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6306     }
6307     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6308     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6309     rowBegin[row - rstart] = k;
6310     rowMid[row - rstart]   = mid;
6311     rowEnd[row - rstart]   = s;
6312 
6313     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6314     Atot += mid - k;
6315     Btot += s - mid;
6316 
6317     /* Count unique nonzeros of this diag row */
6318     for (p = k; p < mid;) {
6319       col = j[p];
6320       do {
6321         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6322         p++;
6323       } while (p < mid && j[p] == col);
6324       Annz++;
6325     }
6326 
6327     /* Count unique nonzeros of this offdiag row */
6328     for (p = mid; p < s;) {
6329       col = j[p];
6330       do {
6331         p++;
6332       } while (p < s && j[p] == col);
6333       Bnnz++;
6334     }
6335     k = s;
6336   }
6337 
6338   /* Allocation according to Atot, Btot, Annz, Bnnz */
6339   PetscCall(PetscMalloc1(Atot, &Aperm));
6340   PetscCall(PetscMalloc1(Btot, &Bperm));
6341   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6342   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6343 
6344   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6345   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6346   for (r = 0; r < m; r++) {
6347     k   = rowBegin[r];
6348     mid = rowMid[r];
6349     s   = rowEnd[r];
6350     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6351     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6352     Atot += mid - k;
6353     Btot += s - mid;
6354 
6355     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6356     for (p = k; p < mid;) {
6357       col = j[p];
6358       q   = p;
6359       do {
6360         p++;
6361       } while (p < mid && j[p] == col);
6362       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6363       Annz++;
6364     }
6365 
6366     for (p = mid; p < s;) {
6367       col = j[p];
6368       q   = p;
6369       do {
6370         p++;
6371       } while (p < s && j[p] == col);
6372       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6373       Bnnz++;
6374     }
6375   }
6376   /* Output */
6377   *Aperm_ = Aperm;
6378   *Annz_  = Annz;
6379   *Atot_  = Atot;
6380   *Ajmap_ = Ajmap;
6381   *Bperm_ = Bperm;
6382   *Bnnz_  = Bnnz;
6383   *Btot_  = Btot;
6384   *Bjmap_ = Bjmap;
6385   PetscFunctionReturn(PETSC_SUCCESS);
6386 }
6387 
6388 /*
6389   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6390 
6391   Input Parameters:
6392     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6393     nnz:  number of unique nonzeros in the merged matrix
6394     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6395     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6396 
6397   Output Parameter: (memory is allocated by the caller)
6398     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6399 
6400   Example:
6401     nnz1 = 4
6402     nnz  = 6
6403     imap = [1,3,4,5]
6404     jmap = [0,3,5,6,7]
6405    then,
6406     jmap_new = [0,0,3,3,5,6,7]
6407 */
6408 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6409 {
6410   PetscCount k, p;
6411 
6412   PetscFunctionBegin;
6413   jmap_new[0] = 0;
6414   p           = nnz;                /* p loops over jmap_new[] backwards */
6415   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6416     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6417   }
6418   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6419   PetscFunctionReturn(PETSC_SUCCESS);
6420 }
6421 
6422 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
6423 {
6424   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
6425 
6426   PetscFunctionBegin;
6427   PetscCall(PetscSFDestroy(&coo->sf));
6428   PetscCall(PetscFree(coo->Aperm1));
6429   PetscCall(PetscFree(coo->Bperm1));
6430   PetscCall(PetscFree(coo->Ajmap1));
6431   PetscCall(PetscFree(coo->Bjmap1));
6432   PetscCall(PetscFree(coo->Aimap2));
6433   PetscCall(PetscFree(coo->Bimap2));
6434   PetscCall(PetscFree(coo->Aperm2));
6435   PetscCall(PetscFree(coo->Bperm2));
6436   PetscCall(PetscFree(coo->Ajmap2));
6437   PetscCall(PetscFree(coo->Bjmap2));
6438   PetscCall(PetscFree(coo->Cperm1));
6439   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6440   PetscCall(PetscFree(coo));
6441   PetscFunctionReturn(PETSC_SUCCESS);
6442 }
6443 
6444 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6445 {
6446   MPI_Comm             comm;
6447   PetscMPIInt          rank, size;
6448   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6449   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6450   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6451   PetscContainer       container;
6452   MatCOOStruct_MPIAIJ *coo;
6453 
6454   PetscFunctionBegin;
6455   PetscCall(PetscFree(mpiaij->garray));
6456   PetscCall(VecDestroy(&mpiaij->lvec));
6457 #if defined(PETSC_USE_CTABLE)
6458   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6459 #else
6460   PetscCall(PetscFree(mpiaij->colmap));
6461 #endif
6462   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6463   mat->assembled     = PETSC_FALSE;
6464   mat->was_assembled = PETSC_FALSE;
6465 
6466   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6467   PetscCallMPI(MPI_Comm_size(comm, &size));
6468   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6469   PetscCall(PetscLayoutSetUp(mat->rmap));
6470   PetscCall(PetscLayoutSetUp(mat->cmap));
6471   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6472   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6473   PetscCall(MatGetLocalSize(mat, &m, &n));
6474   PetscCall(MatGetSize(mat, &M, &N));
6475 
6476   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6477   /* entries come first, then local rows, then remote rows.                     */
6478   PetscCount n1 = coo_n, *perm1;
6479   PetscInt  *i1 = coo_i, *j1 = coo_j;
6480 
6481   PetscCall(PetscMalloc1(n1, &perm1));
6482   for (k = 0; k < n1; k++) perm1[k] = k;
6483 
6484   /* Manipulate indices so that entries with negative row or col indices will have smallest
6485      row indices, local entries will have greater but negative row indices, and remote entries
6486      will have positive row indices.
6487   */
6488   for (k = 0; k < n1; k++) {
6489     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6490     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6491     else {
6492       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6493       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6494     }
6495   }
6496 
6497   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6498   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6499 
6500   /* Advance k to the first entry we need to take care of */
6501   for (k = 0; k < n1; k++)
6502     if (i1[k] > PETSC_INT_MIN) break;
6503   PetscCount i1start = k;
6504 
6505   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6506   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6507 
6508   /*           Send remote rows to their owner                                  */
6509   /* Find which rows should be sent to which remote ranks*/
6510   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6511   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6512   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6513   const PetscInt *ranges;
6514   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6515 
6516   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6517   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6518   for (k = rem; k < n1;) {
6519     PetscMPIInt owner;
6520     PetscInt    firstRow, lastRow;
6521 
6522     /* Locate a row range */
6523     firstRow = i1[k]; /* first row of this owner */
6524     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6525     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6526 
6527     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6528     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6529 
6530     /* All entries in [k,p) belong to this remote owner */
6531     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6532       PetscMPIInt *sendto2;
6533       PetscInt    *nentries2;
6534       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6535 
6536       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6537       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6538       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6539       PetscCall(PetscFree2(sendto, nentries2));
6540       sendto   = sendto2;
6541       nentries = nentries2;
6542       maxNsend = maxNsend2;
6543     }
6544     sendto[nsend] = owner;
6545     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6546     nsend++;
6547     k = p;
6548   }
6549 
6550   /* Build 1st SF to know offsets on remote to send data */
6551   PetscSF      sf1;
6552   PetscInt     nroots = 1, nroots2 = 0;
6553   PetscInt     nleaves = nsend, nleaves2 = 0;
6554   PetscInt    *offsets;
6555   PetscSFNode *iremote;
6556 
6557   PetscCall(PetscSFCreate(comm, &sf1));
6558   PetscCall(PetscMalloc1(nsend, &iremote));
6559   PetscCall(PetscMalloc1(nsend, &offsets));
6560   for (k = 0; k < nsend; k++) {
6561     iremote[k].rank  = sendto[k];
6562     iremote[k].index = 0;
6563     nleaves2 += nentries[k];
6564     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6565   }
6566   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6567   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6568   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6569   PetscCall(PetscSFDestroy(&sf1));
6570   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6571 
6572   /* Build 2nd SF to send remote COOs to their owner */
6573   PetscSF sf2;
6574   nroots  = nroots2;
6575   nleaves = nleaves2;
6576   PetscCall(PetscSFCreate(comm, &sf2));
6577   PetscCall(PetscSFSetFromOptions(sf2));
6578   PetscCall(PetscMalloc1(nleaves, &iremote));
6579   p = 0;
6580   for (k = 0; k < nsend; k++) {
6581     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6582     for (q = 0; q < nentries[k]; q++, p++) {
6583       iremote[p].rank = sendto[k];
6584       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6585     }
6586   }
6587   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6588 
6589   /* Send the remote COOs to their owner */
6590   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6591   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6592   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6593   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6594   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6595   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6596   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6597   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6598   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6599   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6600   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6601 
6602   PetscCall(PetscFree(offsets));
6603   PetscCall(PetscFree2(sendto, nentries));
6604 
6605   /* Sort received COOs by row along with the permutation array     */
6606   for (k = 0; k < n2; k++) perm2[k] = k;
6607   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6608 
6609   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6610   PetscCount *Cperm1;
6611   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6612   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6613   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6614   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6615 
6616   /* Support for HYPRE matrices, kind of a hack.
6617      Swap min column with diagonal so that diagonal values will go first */
6618   PetscBool hypre;
6619   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6620   if (hypre) {
6621     PetscInt *minj;
6622     PetscBT   hasdiag;
6623 
6624     PetscCall(PetscBTCreate(m, &hasdiag));
6625     PetscCall(PetscMalloc1(m, &minj));
6626     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6627     for (k = i1start; k < rem; k++) {
6628       if (j1[k] < cstart || j1[k] >= cend) continue;
6629       const PetscInt rindex = i1[k] - rstart;
6630       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6631       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6632     }
6633     for (k = 0; k < n2; k++) {
6634       if (j2[k] < cstart || j2[k] >= cend) continue;
6635       const PetscInt rindex = i2[k] - rstart;
6636       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6637       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6638     }
6639     for (k = i1start; k < rem; k++) {
6640       const PetscInt rindex = i1[k] - rstart;
6641       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6642       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6643       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6644     }
6645     for (k = 0; k < n2; k++) {
6646       const PetscInt rindex = i2[k] - rstart;
6647       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6648       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6649       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6650     }
6651     PetscCall(PetscBTDestroy(&hasdiag));
6652     PetscCall(PetscFree(minj));
6653   }
6654 
6655   /* Split local COOs and received COOs into diag/offdiag portions */
6656   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6657   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6658   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6659   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6660   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6661   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6662 
6663   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6664   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6665   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6666   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6667 
6668   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6669   PetscInt *Ai, *Bi;
6670   PetscInt *Aj, *Bj;
6671 
6672   PetscCall(PetscMalloc1(m + 1, &Ai));
6673   PetscCall(PetscMalloc1(m + 1, &Bi));
6674   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6675   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6676 
6677   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6678   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6679   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6680   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6681   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6682 
6683   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6684   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6685 
6686   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6687   /* expect nonzeros in A/B most likely have local contributing entries        */
6688   PetscInt    Annz = Ai[m];
6689   PetscInt    Bnnz = Bi[m];
6690   PetscCount *Ajmap1_new, *Bjmap1_new;
6691 
6692   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6693   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6694 
6695   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6696   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6697 
6698   PetscCall(PetscFree(Aimap1));
6699   PetscCall(PetscFree(Ajmap1));
6700   PetscCall(PetscFree(Bimap1));
6701   PetscCall(PetscFree(Bjmap1));
6702   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6703   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6704   PetscCall(PetscFree(perm1));
6705   PetscCall(PetscFree3(i2, j2, perm2));
6706 
6707   Ajmap1 = Ajmap1_new;
6708   Bjmap1 = Bjmap1_new;
6709 
6710   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6711   if (Annz < Annz1 + Annz2) {
6712     PetscInt *Aj_new;
6713     PetscCall(PetscMalloc1(Annz, &Aj_new));
6714     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6715     PetscCall(PetscFree(Aj));
6716     Aj = Aj_new;
6717   }
6718 
6719   if (Bnnz < Bnnz1 + Bnnz2) {
6720     PetscInt *Bj_new;
6721     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6722     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6723     PetscCall(PetscFree(Bj));
6724     Bj = Bj_new;
6725   }
6726 
6727   /* Create new submatrices for on-process and off-process coupling                  */
6728   PetscScalar     *Aa, *Ba;
6729   MatType          rtype;
6730   Mat_SeqAIJ      *a, *b;
6731   PetscObjectState state;
6732   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6733   PetscCall(PetscCalloc1(Bnnz, &Ba));
6734   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6735   if (cstart) {
6736     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6737   }
6738 
6739   PetscCall(MatGetRootType_Private(mat, &rtype));
6740 
6741   MatSeqXAIJGetOptions_Private(mpiaij->A);
6742   PetscCall(MatDestroy(&mpiaij->A));
6743   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6744   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6745   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6746 
6747   MatSeqXAIJGetOptions_Private(mpiaij->B);
6748   PetscCall(MatDestroy(&mpiaij->B));
6749   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6750   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6751   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6752 
6753   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6754   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6755   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6756   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6757 
6758   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6759   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6760   a->free_a  = PETSC_TRUE;
6761   a->free_ij = PETSC_TRUE;
6762   b->free_a  = PETSC_TRUE;
6763   b->free_ij = PETSC_TRUE;
6764   a->maxnz   = a->nz;
6765   b->maxnz   = b->nz;
6766 
6767   /* conversion must happen AFTER multiply setup */
6768   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6769   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6770   PetscCall(VecDestroy(&mpiaij->lvec));
6771   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6772 
6773   // Put the COO struct in a container and then attach that to the matrix
6774   PetscCall(PetscMalloc1(1, &coo));
6775   coo->n       = coo_n;
6776   coo->sf      = sf2;
6777   coo->sendlen = nleaves;
6778   coo->recvlen = nroots;
6779   coo->Annz    = Annz;
6780   coo->Bnnz    = Bnnz;
6781   coo->Annz2   = Annz2;
6782   coo->Bnnz2   = Bnnz2;
6783   coo->Atot1   = Atot1;
6784   coo->Atot2   = Atot2;
6785   coo->Btot1   = Btot1;
6786   coo->Btot2   = Btot2;
6787   coo->Ajmap1  = Ajmap1;
6788   coo->Aperm1  = Aperm1;
6789   coo->Bjmap1  = Bjmap1;
6790   coo->Bperm1  = Bperm1;
6791   coo->Aimap2  = Aimap2;
6792   coo->Ajmap2  = Ajmap2;
6793   coo->Aperm2  = Aperm2;
6794   coo->Bimap2  = Bimap2;
6795   coo->Bjmap2  = Bjmap2;
6796   coo->Bperm2  = Bperm2;
6797   coo->Cperm1  = Cperm1;
6798   // Allocate in preallocation. If not used, it has zero cost on host
6799   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6800   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6801   PetscCall(PetscContainerSetPointer(container, coo));
6802   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6803   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6804   PetscCall(PetscContainerDestroy(&container));
6805   PetscFunctionReturn(PETSC_SUCCESS);
6806 }
6807 
6808 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6809 {
6810   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6811   Mat                  A = mpiaij->A, B = mpiaij->B;
6812   PetscScalar         *Aa, *Ba;
6813   PetscScalar         *sendbuf, *recvbuf;
6814   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6815   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6816   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6817   const PetscCount    *Cperm1;
6818   PetscContainer       container;
6819   MatCOOStruct_MPIAIJ *coo;
6820 
6821   PetscFunctionBegin;
6822   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6823   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6824   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6825   sendbuf = coo->sendbuf;
6826   recvbuf = coo->recvbuf;
6827   Ajmap1  = coo->Ajmap1;
6828   Ajmap2  = coo->Ajmap2;
6829   Aimap2  = coo->Aimap2;
6830   Bjmap1  = coo->Bjmap1;
6831   Bjmap2  = coo->Bjmap2;
6832   Bimap2  = coo->Bimap2;
6833   Aperm1  = coo->Aperm1;
6834   Aperm2  = coo->Aperm2;
6835   Bperm1  = coo->Bperm1;
6836   Bperm2  = coo->Bperm2;
6837   Cperm1  = coo->Cperm1;
6838 
6839   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6840   PetscCall(MatSeqAIJGetArray(B, &Ba));
6841 
6842   /* Pack entries to be sent to remote */
6843   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6844 
6845   /* Send remote entries to their owner and overlap the communication with local computation */
6846   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6847   /* Add local entries to A and B */
6848   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6849     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6850     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6851     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6852   }
6853   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6854     PetscScalar sum = 0.0;
6855     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6856     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6857   }
6858   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6859 
6860   /* Add received remote entries to A and B */
6861   for (PetscCount i = 0; i < coo->Annz2; i++) {
6862     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6863   }
6864   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6865     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6866   }
6867   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6868   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6869   PetscFunctionReturn(PETSC_SUCCESS);
6870 }
6871 
6872 /*MC
6873    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6874 
6875    Options Database Keys:
6876 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6877 
6878    Level: beginner
6879 
6880    Notes:
6881    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6882     in this case the values associated with the rows and columns one passes in are set to zero
6883     in the matrix
6884 
6885     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6886     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6887 
6888 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6889 M*/
6890 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6891 {
6892   Mat_MPIAIJ *b;
6893   PetscMPIInt size;
6894 
6895   PetscFunctionBegin;
6896   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6897 
6898   PetscCall(PetscNew(&b));
6899   B->data       = (void *)b;
6900   B->ops[0]     = MatOps_Values;
6901   B->assembled  = PETSC_FALSE;
6902   B->insertmode = NOT_SET_VALUES;
6903   b->size       = size;
6904 
6905   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6906 
6907   /* build cache for off array entries formed */
6908   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6909 
6910   b->donotstash  = PETSC_FALSE;
6911   b->colmap      = NULL;
6912   b->garray      = NULL;
6913   b->roworiented = PETSC_TRUE;
6914 
6915   /* stuff used for matrix vector multiply */
6916   b->lvec  = NULL;
6917   b->Mvctx = NULL;
6918 
6919   /* stuff for MatGetRow() */
6920   b->rowindices   = NULL;
6921   b->rowvalues    = NULL;
6922   b->getrowactive = PETSC_FALSE;
6923 
6924   /* flexible pointer used in CUSPARSE classes */
6925   b->spptr = NULL;
6926 
6927   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6928   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6929   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6930   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6931   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6932   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6933   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6934   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6935   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6936   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6937   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6938 #if defined(PETSC_HAVE_CUDA)
6939   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6940 #endif
6941 #if defined(PETSC_HAVE_HIP)
6942   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6943 #endif
6944 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6945   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6946 #endif
6947 #if defined(PETSC_HAVE_MKL_SPARSE)
6948   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6949 #endif
6950   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6951   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6952   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6953   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6954 #if defined(PETSC_HAVE_ELEMENTAL)
6955   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6956 #endif
6957 #if defined(PETSC_HAVE_SCALAPACK)
6958   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6959 #endif
6960   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6961   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6962 #if defined(PETSC_HAVE_HYPRE)
6963   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6964   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6965 #endif
6966   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6967   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6968   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6969   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6970   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6971   PetscFunctionReturn(PETSC_SUCCESS);
6972 }
6973 
6974 /*@
6975   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6976   and "off-diagonal" part of the matrix in CSR format.
6977 
6978   Collective
6979 
6980   Input Parameters:
6981 + comm - MPI communicator
6982 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6983 . n    - This value should be the same as the local size used in creating the
6984          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6985          calculated if `N` is given) For square matrices `n` is almost always `m`.
6986 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6987 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6988 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6989 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6990 . a    - matrix values
6991 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6992 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6993 - oa   - matrix values
6994 
6995   Output Parameter:
6996 . mat - the matrix
6997 
6998   Level: advanced
6999 
7000   Notes:
7001   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
7002   must free the arrays once the matrix has been destroyed and not before.
7003 
7004   The `i` and `j` indices are 0 based
7005 
7006   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
7007 
7008   This sets local rows and cannot be used to set off-processor values.
7009 
7010   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
7011   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
7012   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
7013   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
7014   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
7015   communication if it is known that only local entries will be set.
7016 
7017 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
7018           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
7019 @*/
7020 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
7021 {
7022   Mat_MPIAIJ *maij;
7023 
7024   PetscFunctionBegin;
7025   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
7026   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
7027   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
7028   PetscCall(MatCreate(comm, mat));
7029   PetscCall(MatSetSizes(*mat, m, n, M, N));
7030   PetscCall(MatSetType(*mat, MATMPIAIJ));
7031   maij = (Mat_MPIAIJ *)(*mat)->data;
7032 
7033   (*mat)->preallocated = PETSC_TRUE;
7034 
7035   PetscCall(PetscLayoutSetUp((*mat)->rmap));
7036   PetscCall(PetscLayoutSetUp((*mat)->cmap));
7037 
7038   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
7039   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
7040 
7041   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7042   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
7043   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
7044   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
7045   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
7046   PetscFunctionReturn(PETSC_SUCCESS);
7047 }
7048 
7049 typedef struct {
7050   Mat       *mp;    /* intermediate products */
7051   PetscBool *mptmp; /* is the intermediate product temporary ? */
7052   PetscInt   cp;    /* number of intermediate products */
7053 
7054   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
7055   PetscInt    *startsj_s, *startsj_r;
7056   PetscScalar *bufa;
7057   Mat          P_oth;
7058 
7059   /* may take advantage of merging product->B */
7060   Mat Bloc; /* B-local by merging diag and off-diag */
7061 
7062   /* cusparse does not have support to split between symbolic and numeric phases.
7063      When api_user is true, we don't need to update the numerical values
7064      of the temporary storage */
7065   PetscBool reusesym;
7066 
7067   /* support for COO values insertion */
7068   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
7069   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
7070   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
7071   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
7072   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
7073   PetscMemType mtype;
7074 
7075   /* customization */
7076   PetscBool abmerge;
7077   PetscBool P_oth_bind;
7078 } MatMatMPIAIJBACKEND;
7079 
7080 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
7081 {
7082   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
7083   PetscInt             i;
7084 
7085   PetscFunctionBegin;
7086   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
7087   PetscCall(PetscFree(mmdata->bufa));
7088   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7089   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7090   PetscCall(MatDestroy(&mmdata->P_oth));
7091   PetscCall(MatDestroy(&mmdata->Bloc));
7092   PetscCall(PetscSFDestroy(&mmdata->sf));
7093   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7094   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7095   PetscCall(PetscFree(mmdata->own[0]));
7096   PetscCall(PetscFree(mmdata->own));
7097   PetscCall(PetscFree(mmdata->off[0]));
7098   PetscCall(PetscFree(mmdata->off));
7099   PetscCall(PetscFree(mmdata));
7100   PetscFunctionReturn(PETSC_SUCCESS);
7101 }
7102 
7103 /* Copy selected n entries with indices in idx[] of A to v[].
7104    If idx is NULL, copy the whole data array of A to v[]
7105  */
7106 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7107 {
7108   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7109 
7110   PetscFunctionBegin;
7111   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7112   if (f) {
7113     PetscCall((*f)(A, n, idx, v));
7114   } else {
7115     const PetscScalar *vv;
7116 
7117     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7118     if (n && idx) {
7119       PetscScalar    *w  = v;
7120       const PetscInt *oi = idx;
7121       PetscInt        j;
7122 
7123       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7124     } else {
7125       PetscCall(PetscArraycpy(v, vv, n));
7126     }
7127     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7128   }
7129   PetscFunctionReturn(PETSC_SUCCESS);
7130 }
7131 
7132 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7133 {
7134   MatMatMPIAIJBACKEND *mmdata;
7135   PetscInt             i, n_d, n_o;
7136 
7137   PetscFunctionBegin;
7138   MatCheckProduct(C, 1);
7139   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7140   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7141   if (!mmdata->reusesym) { /* update temporary matrices */
7142     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7143     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7144   }
7145   mmdata->reusesym = PETSC_FALSE;
7146 
7147   for (i = 0; i < mmdata->cp; i++) {
7148     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7149     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7150   }
7151   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7152     PetscInt noff;
7153 
7154     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7155     if (mmdata->mptmp[i]) continue;
7156     if (noff) {
7157       PetscInt nown;
7158 
7159       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7160       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7161       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7162       n_o += noff;
7163       n_d += nown;
7164     } else {
7165       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7166 
7167       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7168       n_d += mm->nz;
7169     }
7170   }
7171   if (mmdata->hasoffproc) { /* offprocess insertion */
7172     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7173     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7174   }
7175   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7176   PetscFunctionReturn(PETSC_SUCCESS);
7177 }
7178 
7179 /* Support for Pt * A, A * P, or Pt * A * P */
7180 #define MAX_NUMBER_INTERMEDIATE 4
7181 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7182 {
7183   Mat_Product           *product = C->product;
7184   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7185   Mat_MPIAIJ            *a, *p;
7186   MatMatMPIAIJBACKEND   *mmdata;
7187   ISLocalToGlobalMapping P_oth_l2g = NULL;
7188   IS                     glob      = NULL;
7189   const char            *prefix;
7190   char                   pprefix[256];
7191   const PetscInt        *globidx, *P_oth_idx;
7192   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7193   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7194   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7195                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7196                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7197   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7198 
7199   MatProductType ptype;
7200   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7201   PetscMPIInt    size;
7202 
7203   PetscFunctionBegin;
7204   MatCheckProduct(C, 1);
7205   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7206   ptype = product->type;
7207   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7208     ptype                                          = MATPRODUCT_AB;
7209     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7210   }
7211   switch (ptype) {
7212   case MATPRODUCT_AB:
7213     A          = product->A;
7214     P          = product->B;
7215     m          = A->rmap->n;
7216     n          = P->cmap->n;
7217     M          = A->rmap->N;
7218     N          = P->cmap->N;
7219     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7220     break;
7221   case MATPRODUCT_AtB:
7222     P          = product->A;
7223     A          = product->B;
7224     m          = P->cmap->n;
7225     n          = A->cmap->n;
7226     M          = P->cmap->N;
7227     N          = A->cmap->N;
7228     hasoffproc = PETSC_TRUE;
7229     break;
7230   case MATPRODUCT_PtAP:
7231     A          = product->A;
7232     P          = product->B;
7233     m          = P->cmap->n;
7234     n          = P->cmap->n;
7235     M          = P->cmap->N;
7236     N          = P->cmap->N;
7237     hasoffproc = PETSC_TRUE;
7238     break;
7239   default:
7240     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7241   }
7242   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7243   if (size == 1) hasoffproc = PETSC_FALSE;
7244 
7245   /* defaults */
7246   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7247     mp[i]    = NULL;
7248     mptmp[i] = PETSC_FALSE;
7249     rmapt[i] = -1;
7250     cmapt[i] = -1;
7251     rmapa[i] = NULL;
7252     cmapa[i] = NULL;
7253   }
7254 
7255   /* customization */
7256   PetscCall(PetscNew(&mmdata));
7257   mmdata->reusesym = product->api_user;
7258   if (ptype == MATPRODUCT_AB) {
7259     if (product->api_user) {
7260       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7261       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7262       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7263       PetscOptionsEnd();
7264     } else {
7265       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7266       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7267       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7268       PetscOptionsEnd();
7269     }
7270   } else if (ptype == MATPRODUCT_PtAP) {
7271     if (product->api_user) {
7272       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7273       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7274       PetscOptionsEnd();
7275     } else {
7276       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7277       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7278       PetscOptionsEnd();
7279     }
7280   }
7281   a = (Mat_MPIAIJ *)A->data;
7282   p = (Mat_MPIAIJ *)P->data;
7283   PetscCall(MatSetSizes(C, m, n, M, N));
7284   PetscCall(PetscLayoutSetUp(C->rmap));
7285   PetscCall(PetscLayoutSetUp(C->cmap));
7286   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7287   PetscCall(MatGetOptionsPrefix(C, &prefix));
7288 
7289   cp = 0;
7290   switch (ptype) {
7291   case MATPRODUCT_AB: /* A * P */
7292     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7293 
7294     /* A_diag * P_local (merged or not) */
7295     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7296       /* P is product->B */
7297       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7298       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7299       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7300       PetscCall(MatProductSetFill(mp[cp], product->fill));
7301       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7302       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7303       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7304       mp[cp]->product->api_user = product->api_user;
7305       PetscCall(MatProductSetFromOptions(mp[cp]));
7306       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7307       PetscCall(ISGetIndices(glob, &globidx));
7308       rmapt[cp] = 1;
7309       cmapt[cp] = 2;
7310       cmapa[cp] = globidx;
7311       mptmp[cp] = PETSC_FALSE;
7312       cp++;
7313     } else { /* A_diag * P_diag and A_diag * P_off */
7314       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7315       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7316       PetscCall(MatProductSetFill(mp[cp], product->fill));
7317       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7318       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7319       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7320       mp[cp]->product->api_user = product->api_user;
7321       PetscCall(MatProductSetFromOptions(mp[cp]));
7322       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7323       rmapt[cp] = 1;
7324       cmapt[cp] = 1;
7325       mptmp[cp] = PETSC_FALSE;
7326       cp++;
7327       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7328       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7329       PetscCall(MatProductSetFill(mp[cp], product->fill));
7330       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7331       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7332       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7333       mp[cp]->product->api_user = product->api_user;
7334       PetscCall(MatProductSetFromOptions(mp[cp]));
7335       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7336       rmapt[cp] = 1;
7337       cmapt[cp] = 2;
7338       cmapa[cp] = p->garray;
7339       mptmp[cp] = PETSC_FALSE;
7340       cp++;
7341     }
7342 
7343     /* A_off * P_other */
7344     if (mmdata->P_oth) {
7345       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7346       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7347       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7348       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7349       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7350       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7351       PetscCall(MatProductSetFill(mp[cp], product->fill));
7352       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7353       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7354       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7355       mp[cp]->product->api_user = product->api_user;
7356       PetscCall(MatProductSetFromOptions(mp[cp]));
7357       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7358       rmapt[cp] = 1;
7359       cmapt[cp] = 2;
7360       cmapa[cp] = P_oth_idx;
7361       mptmp[cp] = PETSC_FALSE;
7362       cp++;
7363     }
7364     break;
7365 
7366   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7367     /* A is product->B */
7368     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7369     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7370       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7371       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7372       PetscCall(MatProductSetFill(mp[cp], product->fill));
7373       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7374       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7375       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7376       mp[cp]->product->api_user = product->api_user;
7377       PetscCall(MatProductSetFromOptions(mp[cp]));
7378       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7379       PetscCall(ISGetIndices(glob, &globidx));
7380       rmapt[cp] = 2;
7381       rmapa[cp] = globidx;
7382       cmapt[cp] = 2;
7383       cmapa[cp] = globidx;
7384       mptmp[cp] = PETSC_FALSE;
7385       cp++;
7386     } else {
7387       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7388       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7389       PetscCall(MatProductSetFill(mp[cp], product->fill));
7390       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7391       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7392       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7393       mp[cp]->product->api_user = product->api_user;
7394       PetscCall(MatProductSetFromOptions(mp[cp]));
7395       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7396       PetscCall(ISGetIndices(glob, &globidx));
7397       rmapt[cp] = 1;
7398       cmapt[cp] = 2;
7399       cmapa[cp] = globidx;
7400       mptmp[cp] = PETSC_FALSE;
7401       cp++;
7402       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7403       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7404       PetscCall(MatProductSetFill(mp[cp], product->fill));
7405       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7406       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7407       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7408       mp[cp]->product->api_user = product->api_user;
7409       PetscCall(MatProductSetFromOptions(mp[cp]));
7410       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7411       rmapt[cp] = 2;
7412       rmapa[cp] = p->garray;
7413       cmapt[cp] = 2;
7414       cmapa[cp] = globidx;
7415       mptmp[cp] = PETSC_FALSE;
7416       cp++;
7417     }
7418     break;
7419   case MATPRODUCT_PtAP:
7420     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7421     /* P is product->B */
7422     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7423     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7424     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7425     PetscCall(MatProductSetFill(mp[cp], product->fill));
7426     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7427     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7428     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7429     mp[cp]->product->api_user = product->api_user;
7430     PetscCall(MatProductSetFromOptions(mp[cp]));
7431     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7432     PetscCall(ISGetIndices(glob, &globidx));
7433     rmapt[cp] = 2;
7434     rmapa[cp] = globidx;
7435     cmapt[cp] = 2;
7436     cmapa[cp] = globidx;
7437     mptmp[cp] = PETSC_FALSE;
7438     cp++;
7439     if (mmdata->P_oth) {
7440       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7441       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7442       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7443       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7444       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7445       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7446       PetscCall(MatProductSetFill(mp[cp], product->fill));
7447       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7448       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7449       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7450       mp[cp]->product->api_user = product->api_user;
7451       PetscCall(MatProductSetFromOptions(mp[cp]));
7452       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7453       mptmp[cp] = PETSC_TRUE;
7454       cp++;
7455       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7456       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7457       PetscCall(MatProductSetFill(mp[cp], product->fill));
7458       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7459       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7460       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7461       mp[cp]->product->api_user = product->api_user;
7462       PetscCall(MatProductSetFromOptions(mp[cp]));
7463       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7464       rmapt[cp] = 2;
7465       rmapa[cp] = globidx;
7466       cmapt[cp] = 2;
7467       cmapa[cp] = P_oth_idx;
7468       mptmp[cp] = PETSC_FALSE;
7469       cp++;
7470     }
7471     break;
7472   default:
7473     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7474   }
7475   /* sanity check */
7476   if (size > 1)
7477     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7478 
7479   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7480   for (i = 0; i < cp; i++) {
7481     mmdata->mp[i]    = mp[i];
7482     mmdata->mptmp[i] = mptmp[i];
7483   }
7484   mmdata->cp             = cp;
7485   C->product->data       = mmdata;
7486   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7487   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7488 
7489   /* memory type */
7490   mmdata->mtype = PETSC_MEMTYPE_HOST;
7491   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7492   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7493   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7494   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7495   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7496   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7497 
7498   /* prepare coo coordinates for values insertion */
7499 
7500   /* count total nonzeros of those intermediate seqaij Mats
7501     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7502     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7503     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7504   */
7505   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7506     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7507     if (mptmp[cp]) continue;
7508     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7509       const PetscInt *rmap = rmapa[cp];
7510       const PetscInt  mr   = mp[cp]->rmap->n;
7511       const PetscInt  rs   = C->rmap->rstart;
7512       const PetscInt  re   = C->rmap->rend;
7513       const PetscInt *ii   = mm->i;
7514       for (i = 0; i < mr; i++) {
7515         const PetscInt gr = rmap[i];
7516         const PetscInt nz = ii[i + 1] - ii[i];
7517         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7518         else ncoo_oown += nz;                  /* this row is local */
7519       }
7520     } else ncoo_d += mm->nz;
7521   }
7522 
7523   /*
7524     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7525 
7526     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7527 
7528     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7529 
7530     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7531     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7532     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7533 
7534     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7535     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7536   */
7537   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7538   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7539 
7540   /* gather (i,j) of nonzeros inserted by remote procs */
7541   if (hasoffproc) {
7542     PetscSF  msf;
7543     PetscInt ncoo2, *coo_i2, *coo_j2;
7544 
7545     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7546     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7547     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7548 
7549     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7550       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7551       PetscInt   *idxoff = mmdata->off[cp];
7552       PetscInt   *idxown = mmdata->own[cp];
7553       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7554         const PetscInt *rmap = rmapa[cp];
7555         const PetscInt *cmap = cmapa[cp];
7556         const PetscInt *ii   = mm->i;
7557         PetscInt       *coi  = coo_i + ncoo_o;
7558         PetscInt       *coj  = coo_j + ncoo_o;
7559         const PetscInt  mr   = mp[cp]->rmap->n;
7560         const PetscInt  rs   = C->rmap->rstart;
7561         const PetscInt  re   = C->rmap->rend;
7562         const PetscInt  cs   = C->cmap->rstart;
7563         for (i = 0; i < mr; i++) {
7564           const PetscInt *jj = mm->j + ii[i];
7565           const PetscInt  gr = rmap[i];
7566           const PetscInt  nz = ii[i + 1] - ii[i];
7567           if (gr < rs || gr >= re) { /* this is an offproc row */
7568             for (j = ii[i]; j < ii[i + 1]; j++) {
7569               *coi++    = gr;
7570               *idxoff++ = j;
7571             }
7572             if (!cmapt[cp]) { /* already global */
7573               for (j = 0; j < nz; j++) *coj++ = jj[j];
7574             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7575               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7576             } else { /* offdiag */
7577               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7578             }
7579             ncoo_o += nz;
7580           } else { /* this is a local row */
7581             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7582           }
7583         }
7584       }
7585       mmdata->off[cp + 1] = idxoff;
7586       mmdata->own[cp + 1] = idxown;
7587     }
7588 
7589     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7590     PetscInt incoo_o;
7591     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7592     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7593     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7594     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7595     ncoo = ncoo_d + ncoo_oown + ncoo2;
7596     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7597     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7598     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7599     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7600     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7601     PetscCall(PetscFree2(coo_i, coo_j));
7602     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7603     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7604     coo_i = coo_i2;
7605     coo_j = coo_j2;
7606   } else { /* no offproc values insertion */
7607     ncoo = ncoo_d;
7608     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7609 
7610     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7611     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7612     PetscCall(PetscSFSetUp(mmdata->sf));
7613   }
7614   mmdata->hasoffproc = hasoffproc;
7615 
7616   /* gather (i,j) of nonzeros inserted locally */
7617   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7618     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7619     PetscInt       *coi  = coo_i + ncoo_d;
7620     PetscInt       *coj  = coo_j + ncoo_d;
7621     const PetscInt *jj   = mm->j;
7622     const PetscInt *ii   = mm->i;
7623     const PetscInt *cmap = cmapa[cp];
7624     const PetscInt *rmap = rmapa[cp];
7625     const PetscInt  mr   = mp[cp]->rmap->n;
7626     const PetscInt  rs   = C->rmap->rstart;
7627     const PetscInt  re   = C->rmap->rend;
7628     const PetscInt  cs   = C->cmap->rstart;
7629 
7630     if (mptmp[cp]) continue;
7631     if (rmapt[cp] == 1) { /* consecutive rows */
7632       /* fill coo_i */
7633       for (i = 0; i < mr; i++) {
7634         const PetscInt gr = i + rs;
7635         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7636       }
7637       /* fill coo_j */
7638       if (!cmapt[cp]) { /* type-0, already global */
7639         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7640       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7641         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7642       } else {                                            /* type-2, local to global for sparse columns */
7643         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7644       }
7645       ncoo_d += mm->nz;
7646     } else if (rmapt[cp] == 2) { /* sparse rows */
7647       for (i = 0; i < mr; i++) {
7648         const PetscInt *jj = mm->j + ii[i];
7649         const PetscInt  gr = rmap[i];
7650         const PetscInt  nz = ii[i + 1] - ii[i];
7651         if (gr >= rs && gr < re) { /* local rows */
7652           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7653           if (!cmapt[cp]) { /* type-0, already global */
7654             for (j = 0; j < nz; j++) *coj++ = jj[j];
7655           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7656             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7657           } else { /* type-2, local to global for sparse columns */
7658             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7659           }
7660           ncoo_d += nz;
7661         }
7662       }
7663     }
7664   }
7665   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7666   PetscCall(ISDestroy(&glob));
7667   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7668   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7669   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7670   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7671 
7672   /* set block sizes */
7673   A = product->A;
7674   P = product->B;
7675   switch (ptype) {
7676   case MATPRODUCT_PtAP:
7677     if (P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs));
7678     break;
7679   case MATPRODUCT_RARt:
7680     if (P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs));
7681     break;
7682   case MATPRODUCT_ABC:
7683     PetscCall(MatSetBlockSizesFromMats(C, A, product->C));
7684     break;
7685   case MATPRODUCT_AB:
7686     PetscCall(MatSetBlockSizesFromMats(C, A, P));
7687     break;
7688   case MATPRODUCT_AtB:
7689     if (A->cmap->bs > 1 || P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs));
7690     break;
7691   case MATPRODUCT_ABt:
7692     if (A->rmap->bs > 1 || P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs));
7693     break;
7694   default:
7695     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]);
7696   }
7697 
7698   /* preallocate with COO data */
7699   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7700   PetscCall(PetscFree2(coo_i, coo_j));
7701   PetscFunctionReturn(PETSC_SUCCESS);
7702 }
7703 
7704 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7705 {
7706   Mat_Product *product = mat->product;
7707 #if defined(PETSC_HAVE_DEVICE)
7708   PetscBool match  = PETSC_FALSE;
7709   PetscBool usecpu = PETSC_FALSE;
7710 #else
7711   PetscBool match = PETSC_TRUE;
7712 #endif
7713 
7714   PetscFunctionBegin;
7715   MatCheckProduct(mat, 1);
7716 #if defined(PETSC_HAVE_DEVICE)
7717   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7718   if (match) { /* we can always fallback to the CPU if requested */
7719     switch (product->type) {
7720     case MATPRODUCT_AB:
7721       if (product->api_user) {
7722         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7723         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7724         PetscOptionsEnd();
7725       } else {
7726         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7727         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7728         PetscOptionsEnd();
7729       }
7730       break;
7731     case MATPRODUCT_AtB:
7732       if (product->api_user) {
7733         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7734         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7735         PetscOptionsEnd();
7736       } else {
7737         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7738         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7739         PetscOptionsEnd();
7740       }
7741       break;
7742     case MATPRODUCT_PtAP:
7743       if (product->api_user) {
7744         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7745         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7746         PetscOptionsEnd();
7747       } else {
7748         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7749         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7750         PetscOptionsEnd();
7751       }
7752       break;
7753     default:
7754       break;
7755     }
7756     match = (PetscBool)!usecpu;
7757   }
7758 #endif
7759   if (match) {
7760     switch (product->type) {
7761     case MATPRODUCT_AB:
7762     case MATPRODUCT_AtB:
7763     case MATPRODUCT_PtAP:
7764       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7765       break;
7766     default:
7767       break;
7768     }
7769   }
7770   /* fallback to MPIAIJ ops */
7771   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7772   PetscFunctionReturn(PETSC_SUCCESS);
7773 }
7774 
7775 /*
7776    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7777 
7778    n - the number of block indices in cc[]
7779    cc - the block indices (must be large enough to contain the indices)
7780 */
7781 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7782 {
7783   PetscInt        cnt = -1, nidx, j;
7784   const PetscInt *idx;
7785 
7786   PetscFunctionBegin;
7787   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7788   if (nidx) {
7789     cnt     = 0;
7790     cc[cnt] = idx[0] / bs;
7791     for (j = 1; j < nidx; j++) {
7792       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7793     }
7794   }
7795   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7796   *n = cnt + 1;
7797   PetscFunctionReturn(PETSC_SUCCESS);
7798 }
7799 
7800 /*
7801     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7802 
7803     ncollapsed - the number of block indices
7804     collapsed - the block indices (must be large enough to contain the indices)
7805 */
7806 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7807 {
7808   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7809 
7810   PetscFunctionBegin;
7811   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7812   for (i = start + 1; i < start + bs; i++) {
7813     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7814     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7815     cprevtmp = cprev;
7816     cprev    = merged;
7817     merged   = cprevtmp;
7818   }
7819   *ncollapsed = nprev;
7820   if (collapsed) *collapsed = cprev;
7821   PetscFunctionReturn(PETSC_SUCCESS);
7822 }
7823 
7824 /*
7825  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7826 
7827  Input Parameter:
7828  . Amat - matrix
7829  - symmetrize - make the result symmetric
7830  + scale - scale with diagonal
7831 
7832  Output Parameter:
7833  . a_Gmat - output scalar graph >= 0
7834 
7835 */
7836 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7837 {
7838   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7839   MPI_Comm  comm;
7840   Mat       Gmat;
7841   PetscBool ismpiaij, isseqaij;
7842   Mat       a, b, c;
7843   MatType   jtype;
7844 
7845   PetscFunctionBegin;
7846   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7847   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7848   PetscCall(MatGetSize(Amat, &MM, &NN));
7849   PetscCall(MatGetBlockSize(Amat, &bs));
7850   nloc = (Iend - Istart) / bs;
7851 
7852   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7853   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7854   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7855 
7856   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7857   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7858      implementation */
7859   if (bs > 1) {
7860     PetscCall(MatGetType(Amat, &jtype));
7861     PetscCall(MatCreate(comm, &Gmat));
7862     PetscCall(MatSetType(Gmat, jtype));
7863     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7864     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7865     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7866       PetscInt  *d_nnz, *o_nnz;
7867       MatScalar *aa, val, *AA;
7868       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7869 
7870       if (isseqaij) {
7871         a = Amat;
7872         b = NULL;
7873       } else {
7874         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7875         a             = d->A;
7876         b             = d->B;
7877       }
7878       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7879       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7880       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7881         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7882         const PetscInt *cols1, *cols2;
7883 
7884         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7885           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7886           nnz[brow / bs] = nc2 / bs;
7887           if (nc2 % bs) ok = 0;
7888           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7889           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7890             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7891             if (nc1 != nc2) ok = 0;
7892             else {
7893               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7894                 if (cols1[jj] != cols2[jj]) ok = 0;
7895                 if (cols1[jj] % bs != jj % bs) ok = 0;
7896               }
7897             }
7898             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7899           }
7900           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7901           if (!ok) {
7902             PetscCall(PetscFree2(d_nnz, o_nnz));
7903             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7904             goto old_bs;
7905           }
7906         }
7907       }
7908       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7909       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7910       PetscCall(PetscFree2(d_nnz, o_nnz));
7911       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7912       // diag
7913       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7914         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7915 
7916         ai = aseq->i;
7917         n  = ai[brow + 1] - ai[brow];
7918         aj = aseq->j + ai[brow];
7919         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7920           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7921           val        = 0;
7922           if (index_size == 0) {
7923             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7924               aa = aseq->a + ai[brow + ii] + k;
7925               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7926                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7927               }
7928             }
7929           } else {                                            // use (index,index) value if provided
7930             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7931               PetscInt ii = index[iii];
7932               aa          = aseq->a + ai[brow + ii] + k;
7933               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7934                 PetscInt jj = index[jjj];
7935                 val += PetscAbs(PetscRealPart(aa[jj]));
7936               }
7937             }
7938           }
7939           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7940           AA[k / bs] = val;
7941         }
7942         grow = Istart / bs + brow / bs;
7943         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7944       }
7945       // off-diag
7946       if (ismpiaij) {
7947         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7948         const PetscScalar *vals;
7949         const PetscInt    *cols, *garray = aij->garray;
7950 
7951         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7952         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7953           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7954           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7955             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7956             AA[k / bs] = 0;
7957             AJ[cidx]   = garray[cols[k]] / bs;
7958           }
7959           nc = ncols / bs;
7960           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7961           if (index_size == 0) {
7962             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7963               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7964               for (PetscInt k = 0; k < ncols; k += bs) {
7965                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7966                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7967                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7968                 }
7969               }
7970               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7971             }
7972           } else {                                            // use (index,index) value if provided
7973             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7974               PetscInt ii = index[iii];
7975               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7976               for (PetscInt k = 0; k < ncols; k += bs) {
7977                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7978                   PetscInt jj = index[jjj];
7979                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7980                 }
7981               }
7982               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7983             }
7984           }
7985           grow = Istart / bs + brow / bs;
7986           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7987         }
7988       }
7989       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7990       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7991       PetscCall(PetscFree2(AA, AJ));
7992     } else {
7993       const PetscScalar *vals;
7994       const PetscInt    *idx;
7995       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7996     old_bs:
7997       /*
7998        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7999        */
8000       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
8001       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
8002       if (isseqaij) {
8003         PetscInt max_d_nnz;
8004 
8005         /*
8006          Determine exact preallocation count for (sequential) scalar matrix
8007          */
8008         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
8009         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
8010         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
8011         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
8012         PetscCall(PetscFree3(w0, w1, w2));
8013       } else if (ismpiaij) {
8014         Mat             Daij, Oaij;
8015         const PetscInt *garray;
8016         PetscInt        max_d_nnz;
8017 
8018         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
8019         /*
8020          Determine exact preallocation count for diagonal block portion of scalar matrix
8021          */
8022         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
8023         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
8024         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
8025         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
8026         PetscCall(PetscFree3(w0, w1, w2));
8027         /*
8028          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
8029          */
8030         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
8031           o_nnz[jj] = 0;
8032           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
8033             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
8034             o_nnz[jj] += ncols;
8035             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
8036           }
8037           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
8038         }
8039       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
8040       /* get scalar copy (norms) of matrix */
8041       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
8042       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
8043       PetscCall(PetscFree2(d_nnz, o_nnz));
8044       for (Ii = Istart; Ii < Iend; Ii++) {
8045         PetscInt dest_row = Ii / bs;
8046 
8047         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
8048         for (jj = 0; jj < ncols; jj++) {
8049           PetscInt    dest_col = idx[jj] / bs;
8050           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
8051 
8052           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
8053         }
8054         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
8055       }
8056       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
8057       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
8058     }
8059   } else {
8060     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
8061     else {
8062       Gmat = Amat;
8063       PetscCall(PetscObjectReference((PetscObject)Gmat));
8064     }
8065     if (isseqaij) {
8066       a = Gmat;
8067       b = NULL;
8068     } else {
8069       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8070       a             = d->A;
8071       b             = d->B;
8072     }
8073     if (filter >= 0 || scale) {
8074       /* take absolute value of each entry */
8075       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8076         MatInfo      info;
8077         PetscScalar *avals;
8078 
8079         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8080         PetscCall(MatSeqAIJGetArray(c, &avals));
8081         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8082         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8083       }
8084     }
8085   }
8086   if (symmetrize) {
8087     PetscBool isset, issym;
8088 
8089     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8090     if (!isset || !issym) {
8091       Mat matTrans;
8092 
8093       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8094       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8095       PetscCall(MatDestroy(&matTrans));
8096     }
8097     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8098   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8099   if (scale) {
8100     /* scale c for all diagonal values = 1 or -1 */
8101     Vec diag;
8102 
8103     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8104     PetscCall(MatGetDiagonal(Gmat, diag));
8105     PetscCall(VecReciprocal(diag));
8106     PetscCall(VecSqrtAbs(diag));
8107     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8108     PetscCall(VecDestroy(&diag));
8109   }
8110   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8111   if (filter >= 0) {
8112     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8113     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8114   }
8115   *a_Gmat = Gmat;
8116   PetscFunctionReturn(PETSC_SUCCESS);
8117 }
8118 
8119 /*
8120     Special version for direct calls from Fortran
8121 */
8122 
8123 /* Change these macros so can be used in void function */
8124 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8125 #undef PetscCall
8126 #define PetscCall(...) \
8127   do { \
8128     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8129     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8130       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8131       return; \
8132     } \
8133   } while (0)
8134 
8135 #undef SETERRQ
8136 #define SETERRQ(comm, ierr, ...) \
8137   do { \
8138     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8139     return; \
8140   } while (0)
8141 
8142 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8143   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8144 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8145   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8146 #else
8147 #endif
8148 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8149 {
8150   Mat         mat = *mmat;
8151   PetscInt    m = *mm, n = *mn;
8152   InsertMode  addv = *maddv;
8153   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8154   PetscScalar value;
8155 
8156   MatCheckPreallocated(mat, 1);
8157   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8158   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8159   {
8160     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8161     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8162     PetscBool roworiented = aij->roworiented;
8163 
8164     /* Some Variables required in the macro */
8165     Mat         A     = aij->A;
8166     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8167     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8168     MatScalar  *aa;
8169     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8170     Mat         B                 = aij->B;
8171     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8172     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8173     MatScalar  *ba;
8174     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8175      * cannot use "#if defined" inside a macro. */
8176     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8177 
8178     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8179     PetscInt   nonew = a->nonew;
8180     MatScalar *ap1, *ap2;
8181 
8182     PetscFunctionBegin;
8183     PetscCall(MatSeqAIJGetArray(A, &aa));
8184     PetscCall(MatSeqAIJGetArray(B, &ba));
8185     for (i = 0; i < m; i++) {
8186       if (im[i] < 0) continue;
8187       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8188       if (im[i] >= rstart && im[i] < rend) {
8189         row      = im[i] - rstart;
8190         lastcol1 = -1;
8191         rp1      = aj + ai[row];
8192         ap1      = aa + ai[row];
8193         rmax1    = aimax[row];
8194         nrow1    = ailen[row];
8195         low1     = 0;
8196         high1    = nrow1;
8197         lastcol2 = -1;
8198         rp2      = bj + bi[row];
8199         ap2      = ba + bi[row];
8200         rmax2    = bimax[row];
8201         nrow2    = bilen[row];
8202         low2     = 0;
8203         high2    = nrow2;
8204 
8205         for (j = 0; j < n; j++) {
8206           if (roworiented) value = v[i * n + j];
8207           else value = v[i + j * m];
8208           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8209           if (in[j] >= cstart && in[j] < cend) {
8210             col = in[j] - cstart;
8211             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8212           } else if (in[j] < 0) continue;
8213           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8214             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8215           } else {
8216             if (mat->was_assembled) {
8217               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8218 #if defined(PETSC_USE_CTABLE)
8219               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8220               col--;
8221 #else
8222               col = aij->colmap[in[j]] - 1;
8223 #endif
8224               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8225                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8226                 col = in[j];
8227                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8228                 B        = aij->B;
8229                 b        = (Mat_SeqAIJ *)B->data;
8230                 bimax    = b->imax;
8231                 bi       = b->i;
8232                 bilen    = b->ilen;
8233                 bj       = b->j;
8234                 rp2      = bj + bi[row];
8235                 ap2      = ba + bi[row];
8236                 rmax2    = bimax[row];
8237                 nrow2    = bilen[row];
8238                 low2     = 0;
8239                 high2    = nrow2;
8240                 bm       = aij->B->rmap->n;
8241                 ba       = b->a;
8242                 inserted = PETSC_FALSE;
8243               }
8244             } else col = in[j];
8245             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8246           }
8247         }
8248       } else if (!aij->donotstash) {
8249         if (roworiented) {
8250           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8251         } else {
8252           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8253         }
8254       }
8255     }
8256     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8257     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8258   }
8259   PetscFunctionReturnVoid();
8260 }
8261 
8262 /* Undefining these here since they were redefined from their original definition above! No
8263  * other PETSc functions should be defined past this point, as it is impossible to recover the
8264  * original definitions */
8265 #undef PetscCall
8266 #undef SETERRQ
8267