xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision e64794e4e0b181328b5d79973a03ee824bee75ee)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */
10 #define TYPE AIJ
11 #define TYPE_AIJ
12 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
13 #undef TYPE
14 #undef TYPE_AIJ
15 
16 static PetscErrorCode MatReset_MPIAIJ(Mat mat)
17 {
18   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
19 
20   PetscFunctionBegin;
21   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
22   PetscCall(MatStashDestroy_Private(&mat->stash));
23   PetscCall(VecDestroy(&aij->diag));
24   PetscCall(MatDestroy(&aij->A));
25   PetscCall(MatDestroy(&aij->B));
26 #if defined(PETSC_USE_CTABLE)
27   PetscCall(PetscHMapIDestroy(&aij->colmap));
28 #else
29   PetscCall(PetscFree(aij->colmap));
30 #endif
31   PetscCall(PetscFree(aij->garray));
32   PetscCall(VecDestroy(&aij->lvec));
33   PetscCall(VecScatterDestroy(&aij->Mvctx));
34   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
35   PetscCall(PetscFree(aij->ld));
36   PetscFunctionReturn(PETSC_SUCCESS);
37 }
38 
39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat)
40 {
41   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
42   /* Save the nonzero states of the component matrices because those are what are used to determine
43     the nonzero state of mat */
44   PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate;
45 
46   PetscFunctionBegin;
47   PetscCall(MatReset_MPIAIJ(mat));
48   PetscCall(MatSetUp_MPI_Hash(mat));
49   aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate;
50   PetscFunctionReturn(PETSC_SUCCESS);
51 }
52 
53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
54 {
55   PetscFunctionBegin;
56   PetscCall(MatReset_MPIAIJ(mat));
57 
58   PetscCall(PetscFree(mat->data));
59 
60   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
61   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
62 
63   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
65   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
66   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
69   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL));
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
74 #if defined(PETSC_HAVE_CUDA)
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
76 #endif
77 #if defined(PETSC_HAVE_HIP)
78   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
79 #endif
80 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
82 #endif
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
84 #if defined(PETSC_HAVE_ELEMENTAL)
85   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
86 #endif
87 #if defined(PETSC_HAVE_SCALAPACK)
88   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
89 #endif
90 #if defined(PETSC_HAVE_HYPRE)
91   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
92   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
93 #endif
94   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
95   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
96   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
97   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
98   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
99   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
100 #if defined(PETSC_HAVE_MKL_SPARSE)
101   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
102 #endif
103   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
104   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
105   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
106   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
107   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
108   PetscFunctionReturn(PETSC_SUCCESS);
109 }
110 
111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
112 {
113   Mat B;
114 
115   PetscFunctionBegin;
116   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
117   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
118   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
119   PetscCall(MatDestroy(&B));
120   PetscFunctionReturn(PETSC_SUCCESS);
121 }
122 
123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
124 {
125   Mat B;
126 
127   PetscFunctionBegin;
128   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
129   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
130   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
131   PetscFunctionReturn(PETSC_SUCCESS);
132 }
133 
134 /*MC
135    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
136 
137    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
138    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
139   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
140   for communicators controlling multiple processes.  It is recommended that you call both of
141   the above preallocation routines for simplicity.
142 
143    Options Database Key:
144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
145 
146   Developer Note:
147   Level: beginner
148 
149     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
150    enough exist.
151 
152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
153 M*/
154 
155 /*MC
156    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
157 
158    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
159    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
160    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
161   for communicators controlling multiple processes.  It is recommended that you call both of
162   the above preallocation routines for simplicity.
163 
164    Options Database Key:
165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
166 
167   Level: beginner
168 
169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
170 M*/
171 
172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
173 {
174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
175 
176   PetscFunctionBegin;
177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
178   A->boundtocpu = flg;
179 #endif
180   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
181   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
182 
183   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
184    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
185    * to differ from the parent matrix. */
186   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
187   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
188   PetscFunctionReturn(PETSC_SUCCESS);
189 }
190 
191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
192 {
193   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
194 
195   PetscFunctionBegin;
196   if (mat->A) {
197     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
198     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
199   }
200   PetscFunctionReturn(PETSC_SUCCESS);
201 }
202 
203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
204 {
205   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
206   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
207   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
208   const PetscInt  *ia, *ib;
209   const MatScalar *aa, *bb, *aav, *bav;
210   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
211   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
212 
213   PetscFunctionBegin;
214   *keptrows = NULL;
215 
216   ia = a->i;
217   ib = b->i;
218   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
219   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
220   for (i = 0; i < m; i++) {
221     na = ia[i + 1] - ia[i];
222     nb = ib[i + 1] - ib[i];
223     if (!na && !nb) {
224       cnt++;
225       goto ok1;
226     }
227     aa = aav + ia[i];
228     for (j = 0; j < na; j++) {
229       if (aa[j] != 0.0) goto ok1;
230     }
231     bb = PetscSafePointerPlusOffset(bav, ib[i]);
232     for (j = 0; j < nb; j++) {
233       if (bb[j] != 0.0) goto ok1;
234     }
235     cnt++;
236   ok1:;
237   }
238   PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
239   if (!n0rows) {
240     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
241     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
242     PetscFunctionReturn(PETSC_SUCCESS);
243   }
244   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
245   cnt = 0;
246   for (i = 0; i < m; i++) {
247     na = ia[i + 1] - ia[i];
248     nb = ib[i + 1] - ib[i];
249     if (!na && !nb) continue;
250     aa = aav + ia[i];
251     for (j = 0; j < na; j++) {
252       if (aa[j] != 0.0) {
253         rows[cnt++] = rstart + i;
254         goto ok2;
255       }
256     }
257     bb = PetscSafePointerPlusOffset(bav, ib[i]);
258     for (j = 0; j < nb; j++) {
259       if (bb[j] != 0.0) {
260         rows[cnt++] = rstart + i;
261         goto ok2;
262       }
263     }
264   ok2:;
265   }
266   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
267   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
268   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
269   PetscFunctionReturn(PETSC_SUCCESS);
270 }
271 
272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
273 {
274   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
275   PetscBool   cong;
276 
277   PetscFunctionBegin;
278   PetscCall(MatHasCongruentLayouts(Y, &cong));
279   if (Y->assembled && cong) {
280     PetscCall(MatDiagonalSet(aij->A, D, is));
281   } else {
282     PetscCall(MatDiagonalSet_Default(Y, D, is));
283   }
284   PetscFunctionReturn(PETSC_SUCCESS);
285 }
286 
287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
288 {
289   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
290   PetscInt    i, rstart, nrows, *rows;
291 
292   PetscFunctionBegin;
293   *zrows = NULL;
294   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
295   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
296   for (i = 0; i < nrows; i++) rows[i] += rstart;
297   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
302 {
303   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
304   PetscInt           i, m, n, *garray = aij->garray;
305   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
306   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
307   PetscReal         *work;
308   const PetscScalar *dummy;
309 
310   PetscFunctionBegin;
311   PetscCall(MatGetSize(A, &m, &n));
312   PetscCall(PetscCalloc1(n, &work));
313   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
314   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
315   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
316   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
317   if (type == NORM_2) {
318     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
319     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
320   } else if (type == NORM_1) {
321     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
322     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
323   } else if (type == NORM_INFINITY) {
324     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
325     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
326   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
327     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
328     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
329   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
330     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
331     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
332   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
333   if (type == NORM_INFINITY) {
334     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
335   } else {
336     PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
337   }
338   PetscCall(PetscFree(work));
339   if (type == NORM_2) {
340     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
341   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
342     for (i = 0; i < n; i++) reductions[i] /= m;
343   }
344   PetscFunctionReturn(PETSC_SUCCESS);
345 }
346 
347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
348 {
349   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
350   IS              sis, gis;
351   const PetscInt *isis, *igis;
352   PetscInt        n, *iis, nsis, ngis, rstart, i;
353 
354   PetscFunctionBegin;
355   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
356   PetscCall(MatFindNonzeroRows(a->B, &gis));
357   PetscCall(ISGetSize(gis, &ngis));
358   PetscCall(ISGetSize(sis, &nsis));
359   PetscCall(ISGetIndices(sis, &isis));
360   PetscCall(ISGetIndices(gis, &igis));
361 
362   PetscCall(PetscMalloc1(ngis + nsis, &iis));
363   PetscCall(PetscArraycpy(iis, igis, ngis));
364   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
365   n = ngis + nsis;
366   PetscCall(PetscSortRemoveDupsInt(&n, iis));
367   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
368   for (i = 0; i < n; i++) iis[i] += rstart;
369   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
370 
371   PetscCall(ISRestoreIndices(sis, &isis));
372   PetscCall(ISRestoreIndices(gis, &igis));
373   PetscCall(ISDestroy(&sis));
374   PetscCall(ISDestroy(&gis));
375   PetscFunctionReturn(PETSC_SUCCESS);
376 }
377 
378 /*
379   Local utility routine that creates a mapping from the global column
380 number to the local number in the off-diagonal part of the local
381 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
382 a slightly higher hash table cost; without it it is not scalable (each processor
383 has an order N integer array but is fast to access.
384 */
385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
386 {
387   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
388   PetscInt    n   = aij->B->cmap->n, i;
389 
390   PetscFunctionBegin;
391   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
392 #if defined(PETSC_USE_CTABLE)
393   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
394   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
395 #else
396   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
397   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
398 #endif
399   PetscFunctionReturn(PETSC_SUCCESS);
400 }
401 
402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
403   do { \
404     if (col <= lastcol1) low1 = 0; \
405     else high1 = nrow1; \
406     lastcol1 = col; \
407     while (high1 - low1 > 5) { \
408       t = (low1 + high1) / 2; \
409       if (rp1[t] > col) high1 = t; \
410       else low1 = t; \
411     } \
412     for (_i = low1; _i < high1; _i++) { \
413       if (rp1[_i] > col) break; \
414       if (rp1[_i] == col) { \
415         if (addv == ADD_VALUES) { \
416           ap1[_i] += value; \
417           /* Not sure LogFlops will slow down the code or not */ \
418           (void)PetscLogFlops(1.0); \
419         } else ap1[_i] = value; \
420         goto a_noinsert; \
421       } \
422     } \
423     if (value == 0.0 && ignorezeroentries && row != col) { \
424       low1  = 0; \
425       high1 = nrow1; \
426       goto a_noinsert; \
427     } \
428     if (nonew == 1) { \
429       low1  = 0; \
430       high1 = nrow1; \
431       goto a_noinsert; \
432     } \
433     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
434     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
435     N = nrow1++ - 1; \
436     a->nz++; \
437     high1++; \
438     /* shift up all the later entries in this row */ \
439     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
440     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
441     rp1[_i] = col; \
442     ap1[_i] = value; \
443   a_noinsert:; \
444     ailen[row] = nrow1; \
445   } while (0)
446 
447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
448   do { \
449     if (col <= lastcol2) low2 = 0; \
450     else high2 = nrow2; \
451     lastcol2 = col; \
452     while (high2 - low2 > 5) { \
453       t = (low2 + high2) / 2; \
454       if (rp2[t] > col) high2 = t; \
455       else low2 = t; \
456     } \
457     for (_i = low2; _i < high2; _i++) { \
458       if (rp2[_i] > col) break; \
459       if (rp2[_i] == col) { \
460         if (addv == ADD_VALUES) { \
461           ap2[_i] += value; \
462           (void)PetscLogFlops(1.0); \
463         } else ap2[_i] = value; \
464         goto b_noinsert; \
465       } \
466     } \
467     if (value == 0.0 && ignorezeroentries) { \
468       low2  = 0; \
469       high2 = nrow2; \
470       goto b_noinsert; \
471     } \
472     if (nonew == 1) { \
473       low2  = 0; \
474       high2 = nrow2; \
475       goto b_noinsert; \
476     } \
477     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
478     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
479     N = nrow2++ - 1; \
480     b->nz++; \
481     high2++; \
482     /* shift up all the later entries in this row */ \
483     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
484     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
485     rp2[_i] = col; \
486     ap2[_i] = value; \
487   b_noinsert:; \
488     bilen[row] = nrow2; \
489   } while (0)
490 
491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
492 {
493   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
494   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
495   PetscInt     l, *garray                         = mat->garray, diag;
496   PetscScalar *aa, *ba;
497 
498   PetscFunctionBegin;
499   /* code only works for square matrices A */
500 
501   /* find size of row to the left of the diagonal part */
502   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
503   row = row - diag;
504   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
505     if (garray[b->j[b->i[row] + l]] > diag) break;
506   }
507   if (l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512 
513   /* diagonal part */
514   if (a->i[row + 1] - a->i[row]) {
515     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
516     PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row]));
517     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
518   }
519 
520   /* right of diagonal part */
521   if (b->i[row + 1] - b->i[row] - l) {
522     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
523     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
524     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
525   }
526   PetscFunctionReturn(PETSC_SUCCESS);
527 }
528 
529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
530 {
531   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
532   PetscScalar value = 0.0;
533   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
534   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
535   PetscBool   roworiented = aij->roworiented;
536 
537   /* Some Variables required in the macro */
538   Mat         A     = aij->A;
539   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
540   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
541   PetscBool   ignorezeroentries = a->ignorezeroentries;
542   Mat         B                 = aij->B;
543   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
544   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
545   MatScalar  *aa, *ba;
546   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
547   PetscInt    nonew;
548   MatScalar  *ap1, *ap2;
549 
550   PetscFunctionBegin;
551   PetscCall(MatSeqAIJGetArray(A, &aa));
552   PetscCall(MatSeqAIJGetArray(B, &ba));
553   for (i = 0; i < m; i++) {
554     if (im[i] < 0) continue;
555     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
556     if (im[i] >= rstart && im[i] < rend) {
557       row      = im[i] - rstart;
558       lastcol1 = -1;
559       rp1      = PetscSafePointerPlusOffset(aj, ai[row]);
560       ap1      = PetscSafePointerPlusOffset(aa, ai[row]);
561       rmax1    = aimax[row];
562       nrow1    = ailen[row];
563       low1     = 0;
564       high1    = nrow1;
565       lastcol2 = -1;
566       rp2      = PetscSafePointerPlusOffset(bj, bi[row]);
567       ap2      = PetscSafePointerPlusOffset(ba, bi[row]);
568       rmax2    = bimax[row];
569       nrow2    = bilen[row];
570       low2     = 0;
571       high2    = nrow2;
572 
573       for (j = 0; j < n; j++) {
574         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
575         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
576         if (in[j] >= cstart && in[j] < cend) {
577           col   = in[j] - cstart;
578           nonew = a->nonew;
579           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
580         } else if (in[j] < 0) {
581           continue;
582         } else {
583           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
584           if (mat->was_assembled) {
585             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
586 #if defined(PETSC_USE_CTABLE)
587             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
588             col--;
589 #else
590             col = aij->colmap[in[j]] - 1;
591 #endif
592             if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */
593               PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));  /* Change aij->B from reduced/local format to expanded/global format */
594               col = in[j];
595               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
596               B     = aij->B;
597               b     = (Mat_SeqAIJ *)B->data;
598               bimax = b->imax;
599               bi    = b->i;
600               bilen = b->ilen;
601               bj    = b->j;
602               ba    = b->a;
603               rp2   = PetscSafePointerPlusOffset(bj, bi[row]);
604               ap2   = PetscSafePointerPlusOffset(ba, bi[row]);
605               rmax2 = bimax[row];
606               nrow2 = bilen[row];
607               low2  = 0;
608               high2 = nrow2;
609               bm    = aij->B->rmap->n;
610               ba    = b->a;
611             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
612               PetscCheck(1 == ((Mat_SeqAIJ *)aij->B->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
613               PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
614             }
615           } else col = in[j];
616           nonew = b->nonew;
617           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
618         }
619       }
620     } else {
621       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
622       if (!aij->donotstash) {
623         mat->assembled = PETSC_FALSE;
624         if (roworiented) {
625           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
626         } else {
627           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
628         }
629       }
630     }
631   }
632   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
633   PetscCall(MatSeqAIJRestoreArray(B, &ba));
634   PetscFunctionReturn(PETSC_SUCCESS);
635 }
636 
637 /*
638     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
639     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
640     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
641 */
642 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
643 {
644   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
645   Mat         A      = aij->A; /* diagonal part of the matrix */
646   Mat         B      = aij->B; /* off-diagonal part of the matrix */
647   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
648   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
649   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
650   PetscInt   *ailen = a->ilen, *aj = a->j;
651   PetscInt   *bilen = b->ilen, *bj = b->j;
652   PetscInt    am          = aij->A->rmap->n, j;
653   PetscInt    diag_so_far = 0, dnz;
654   PetscInt    offd_so_far = 0, onz;
655 
656   PetscFunctionBegin;
657   /* Iterate over all rows of the matrix */
658   for (j = 0; j < am; j++) {
659     dnz = onz = 0;
660     /*  Iterate over all non-zero columns of the current row */
661     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
662       /* If column is in the diagonal */
663       if (mat_j[col] >= cstart && mat_j[col] < cend) {
664         aj[diag_so_far++] = mat_j[col] - cstart;
665         dnz++;
666       } else { /* off-diagonal entries */
667         bj[offd_so_far++] = mat_j[col];
668         onz++;
669       }
670     }
671     ailen[j] = dnz;
672     bilen[j] = onz;
673   }
674   PetscFunctionReturn(PETSC_SUCCESS);
675 }
676 
677 /*
678     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
679     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
680     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
681     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
682     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
683 */
684 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
685 {
686   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
687   Mat          A    = aij->A; /* diagonal part of the matrix */
688   Mat          B    = aij->B; /* off-diagonal part of the matrix */
689   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data;
690   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
691   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
692   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
693   PetscInt    *ailen = a->ilen, *aj = a->j;
694   PetscInt    *bilen = b->ilen, *bj = b->j;
695   PetscInt     am          = aij->A->rmap->n, j;
696   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
697   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
698   PetscScalar *aa = a->a, *ba = b->a;
699 
700   PetscFunctionBegin;
701   /* Iterate over all rows of the matrix */
702   for (j = 0; j < am; j++) {
703     dnz_row = onz_row = 0;
704     rowstart_offd     = full_offd_i[j];
705     rowstart_diag     = full_diag_i[j];
706     /*  Iterate over all non-zero columns of the current row */
707     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
708       /* If column is in the diagonal */
709       if (mat_j[col] >= cstart && mat_j[col] < cend) {
710         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
711         aa[rowstart_diag + dnz_row] = mat_a[col];
712         dnz_row++;
713       } else { /* off-diagonal entries */
714         bj[rowstart_offd + onz_row] = mat_j[col];
715         ba[rowstart_offd + onz_row] = mat_a[col];
716         onz_row++;
717       }
718     }
719     ailen[j] = dnz_row;
720     bilen[j] = onz_row;
721   }
722   PetscFunctionReturn(PETSC_SUCCESS);
723 }
724 
725 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
726 {
727   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
728   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
729   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
730 
731   PetscFunctionBegin;
732   for (i = 0; i < m; i++) {
733     if (idxm[i] < 0) continue; /* negative row */
734     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
735     PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend);
736     row = idxm[i] - rstart;
737     for (j = 0; j < n; j++) {
738       if (idxn[j] < 0) continue; /* negative column */
739       PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
740       if (idxn[j] >= cstart && idxn[j] < cend) {
741         col = idxn[j] - cstart;
742         PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
743       } else {
744         if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
745 #if defined(PETSC_USE_CTABLE)
746         PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
747         col--;
748 #else
749         col = aij->colmap[idxn[j]] - 1;
750 #endif
751         if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
752         else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
753       }
754     }
755   }
756   PetscFunctionReturn(PETSC_SUCCESS);
757 }
758 
759 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
760 {
761   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
762   PetscInt    nstash, reallocs;
763 
764   PetscFunctionBegin;
765   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
766 
767   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
768   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
769   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
770   PetscFunctionReturn(PETSC_SUCCESS);
771 }
772 
773 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
774 {
775   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
776   PetscMPIInt  n;
777   PetscInt     i, j, rstart, ncols, flg;
778   PetscInt    *row, *col;
779   PetscBool    all_assembled;
780   PetscScalar *val;
781 
782   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
783 
784   PetscFunctionBegin;
785   if (!aij->donotstash && !mat->nooffprocentries) {
786     while (1) {
787       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
788       if (!flg) break;
789 
790       for (i = 0; i < n;) {
791         /* Now identify the consecutive vals belonging to the same row */
792         for (j = i, rstart = row[j]; j < n; j++) {
793           if (row[j] != rstart) break;
794         }
795         if (j < n) ncols = j - i;
796         else ncols = n - i;
797         /* Now assemble all these values with a single function call */
798         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
799         i = j;
800       }
801     }
802     PetscCall(MatStashScatterEnd_Private(&mat->stash));
803   }
804 #if defined(PETSC_HAVE_DEVICE)
805   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
806   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
807   if (mat->boundtocpu) {
808     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
809     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
810   }
811 #endif
812   PetscCall(MatAssemblyBegin(aij->A, mode));
813   PetscCall(MatAssemblyEnd(aij->A, mode));
814 
815   /* determine if any process has disassembled, if so we must
816      also disassemble ourself, in order that we may reassemble. */
817   /*
818      if nonzero structure of submatrix B cannot change then we know that
819      no process disassembled thus we can skip this stuff
820   */
821   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
822     PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
823     if (mat->was_assembled && !all_assembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
824       PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
825     }
826   }
827   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
828   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
829 #if defined(PETSC_HAVE_DEVICE)
830   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
831 #endif
832   PetscCall(MatAssemblyBegin(aij->B, mode));
833   PetscCall(MatAssemblyEnd(aij->B, mode));
834 
835   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
836 
837   aij->rowvalues = NULL;
838 
839   PetscCall(VecDestroy(&aij->diag));
840 
841   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
842   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) {
843     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
844     PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
845   }
846 #if defined(PETSC_HAVE_DEVICE)
847   mat->offloadmask = PETSC_OFFLOAD_BOTH;
848 #endif
849   PetscFunctionReturn(PETSC_SUCCESS);
850 }
851 
852 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
853 {
854   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
855 
856   PetscFunctionBegin;
857   PetscCall(MatZeroEntries(l->A));
858   PetscCall(MatZeroEntries(l->B));
859   PetscFunctionReturn(PETSC_SUCCESS);
860 }
861 
862 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
863 {
864   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data;
865   PetscInt   *lrows;
866   PetscInt    r, len;
867   PetscBool   cong;
868 
869   PetscFunctionBegin;
870   /* get locally owned rows */
871   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
872   PetscCall(MatHasCongruentLayouts(A, &cong));
873   /* fix right-hand side if needed */
874   if (x && b) {
875     const PetscScalar *xx;
876     PetscScalar       *bb;
877 
878     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
879     PetscCall(VecGetArrayRead(x, &xx));
880     PetscCall(VecGetArray(b, &bb));
881     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
882     PetscCall(VecRestoreArrayRead(x, &xx));
883     PetscCall(VecRestoreArray(b, &bb));
884   }
885 
886   if (diag != 0.0 && cong) {
887     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
888     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
889   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
890     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
891     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
892     PetscInt    nnwA, nnwB;
893     PetscBool   nnzA, nnzB;
894 
895     nnwA = aijA->nonew;
896     nnwB = aijB->nonew;
897     nnzA = aijA->keepnonzeropattern;
898     nnzB = aijB->keepnonzeropattern;
899     if (!nnzA) {
900       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
901       aijA->nonew = 0;
902     }
903     if (!nnzB) {
904       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
905       aijB->nonew = 0;
906     }
907     /* Must zero here before the next loop */
908     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
909     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
910     for (r = 0; r < len; ++r) {
911       const PetscInt row = lrows[r] + A->rmap->rstart;
912       if (row >= A->cmap->N) continue;
913       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
914     }
915     aijA->nonew = nnwA;
916     aijB->nonew = nnwB;
917   } else {
918     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
919     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
920   }
921   PetscCall(PetscFree(lrows));
922   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
923   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
924 
925   /* only change matrix nonzero state if pattern was allowed to be changed */
926   if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) {
927     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
928     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
929   }
930   PetscFunctionReturn(PETSC_SUCCESS);
931 }
932 
933 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
934 {
935   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
936   PetscInt           n = A->rmap->n;
937   PetscInt           i, j, r, m, len = 0;
938   PetscInt          *lrows, *owners = A->rmap->range;
939   PetscMPIInt        p = 0;
940   PetscSFNode       *rrows;
941   PetscSF            sf;
942   const PetscScalar *xx;
943   PetscScalar       *bb, *mask, *aij_a;
944   Vec                xmask, lmask;
945   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
946   const PetscInt    *aj, *ii, *ridx;
947   PetscScalar       *aa;
948 
949   PetscFunctionBegin;
950   /* Create SF where leaves are input rows and roots are owned rows */
951   PetscCall(PetscMalloc1(n, &lrows));
952   for (r = 0; r < n; ++r) lrows[r] = -1;
953   PetscCall(PetscMalloc1(N, &rrows));
954   for (r = 0; r < N; ++r) {
955     const PetscInt idx = rows[r];
956     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
957     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
958       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
959     }
960     rrows[r].rank  = p;
961     rrows[r].index = rows[r] - owners[p];
962   }
963   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
964   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
965   /* Collect flags for rows to be zeroed */
966   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
967   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
968   PetscCall(PetscSFDestroy(&sf));
969   /* Compress and put in row numbers */
970   for (r = 0; r < n; ++r)
971     if (lrows[r] >= 0) lrows[len++] = r;
972   /* zero diagonal part of matrix */
973   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
974   /* handle off-diagonal part of matrix */
975   PetscCall(MatCreateVecs(A, &xmask, NULL));
976   PetscCall(VecDuplicate(l->lvec, &lmask));
977   PetscCall(VecGetArray(xmask, &bb));
978   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
979   PetscCall(VecRestoreArray(xmask, &bb));
980   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
981   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
982   PetscCall(VecDestroy(&xmask));
983   if (x && b) { /* this code is buggy when the row and column layout don't match */
984     PetscBool cong;
985 
986     PetscCall(MatHasCongruentLayouts(A, &cong));
987     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
988     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
989     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
990     PetscCall(VecGetArrayRead(l->lvec, &xx));
991     PetscCall(VecGetArray(b, &bb));
992   }
993   PetscCall(VecGetArray(lmask, &mask));
994   /* remove zeroed rows of off-diagonal matrix */
995   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
996   ii = aij->i;
997   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]]));
998   /* loop over all elements of off process part of matrix zeroing removed columns*/
999   if (aij->compressedrow.use) {
1000     m    = aij->compressedrow.nrows;
1001     ii   = aij->compressedrow.i;
1002     ridx = aij->compressedrow.rindex;
1003     for (i = 0; i < m; i++) {
1004       n  = ii[i + 1] - ii[i];
1005       aj = aij->j + ii[i];
1006       aa = aij_a + ii[i];
1007 
1008       for (j = 0; j < n; j++) {
1009         if (PetscAbsScalar(mask[*aj])) {
1010           if (b) bb[*ridx] -= *aa * xx[*aj];
1011           *aa = 0.0;
1012         }
1013         aa++;
1014         aj++;
1015       }
1016       ridx++;
1017     }
1018   } else { /* do not use compressed row format */
1019     m = l->B->rmap->n;
1020     for (i = 0; i < m; i++) {
1021       n  = ii[i + 1] - ii[i];
1022       aj = aij->j + ii[i];
1023       aa = aij_a + ii[i];
1024       for (j = 0; j < n; j++) {
1025         if (PetscAbsScalar(mask[*aj])) {
1026           if (b) bb[i] -= *aa * xx[*aj];
1027           *aa = 0.0;
1028         }
1029         aa++;
1030         aj++;
1031       }
1032     }
1033   }
1034   if (x && b) {
1035     PetscCall(VecRestoreArray(b, &bb));
1036     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1037   }
1038   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1039   PetscCall(VecRestoreArray(lmask, &mask));
1040   PetscCall(VecDestroy(&lmask));
1041   PetscCall(PetscFree(lrows));
1042 
1043   /* only change matrix nonzero state if pattern was allowed to be changed */
1044   if (!((Mat_SeqAIJ *)l->A->data)->nonew) {
1045     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1046     PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1047   }
1048   PetscFunctionReturn(PETSC_SUCCESS);
1049 }
1050 
1051 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1052 {
1053   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1054   PetscInt    nt;
1055   VecScatter  Mvctx = a->Mvctx;
1056 
1057   PetscFunctionBegin;
1058   PetscCall(VecGetLocalSize(xx, &nt));
1059   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1060   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1061   PetscUseTypeMethod(a->A, mult, xx, yy);
1062   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1063   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1064   PetscFunctionReturn(PETSC_SUCCESS);
1065 }
1066 
1067 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1068 {
1069   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1070 
1071   PetscFunctionBegin;
1072   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1073   PetscFunctionReturn(PETSC_SUCCESS);
1074 }
1075 
1076 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1077 {
1078   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1079   VecScatter  Mvctx = a->Mvctx;
1080 
1081   PetscFunctionBegin;
1082   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1083   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1084   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1085   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087 }
1088 
1089 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1090 {
1091   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1092 
1093   PetscFunctionBegin;
1094   /* do nondiagonal part */
1095   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1096   /* do local part */
1097   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1098   /* add partial results together */
1099   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1100   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1101   PetscFunctionReturn(PETSC_SUCCESS);
1102 }
1103 
1104 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1105 {
1106   MPI_Comm    comm;
1107   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1108   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1109   IS          Me, Notme;
1110   PetscInt    M, N, first, last, *notme, i;
1111   PetscBool   lf;
1112   PetscMPIInt size;
1113 
1114   PetscFunctionBegin;
1115   /* Easy test: symmetric diagonal block */
1116   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1117   PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1118   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1119   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1120   PetscCallMPI(MPI_Comm_size(comm, &size));
1121   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1122 
1123   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1124   PetscCall(MatGetSize(Amat, &M, &N));
1125   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1126   PetscCall(PetscMalloc1(N - last + first, &notme));
1127   for (i = 0; i < first; i++) notme[i] = i;
1128   for (i = last; i < M; i++) notme[i - last + first] = i;
1129   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1130   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1131   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1132   Aoff = Aoffs[0];
1133   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1134   Boff = Boffs[0];
1135   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1136   PetscCall(MatDestroyMatrices(1, &Aoffs));
1137   PetscCall(MatDestroyMatrices(1, &Boffs));
1138   PetscCall(ISDestroy(&Me));
1139   PetscCall(ISDestroy(&Notme));
1140   PetscCall(PetscFree(notme));
1141   PetscFunctionReturn(PETSC_SUCCESS);
1142 }
1143 
1144 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   /* do nondiagonal part */
1150   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1151   /* do local part */
1152   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1153   /* add partial results together */
1154   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1155   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1156   PetscFunctionReturn(PETSC_SUCCESS);
1157 }
1158 
1159 /*
1160   This only works correctly for square matrices where the subblock A->A is the
1161    diagonal block
1162 */
1163 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1164 {
1165   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1166 
1167   PetscFunctionBegin;
1168   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1169   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1170   PetscCall(MatGetDiagonal(a->A, v));
1171   PetscFunctionReturn(PETSC_SUCCESS);
1172 }
1173 
1174 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1175 {
1176   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1177 
1178   PetscFunctionBegin;
1179   PetscCall(MatScale(a->A, aa));
1180   PetscCall(MatScale(a->B, aa));
1181   PetscFunctionReturn(PETSC_SUCCESS);
1182 }
1183 
1184 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1185 {
1186   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1187   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1188   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1189   const PetscInt    *garray = aij->garray;
1190   const PetscScalar *aa, *ba;
1191   PetscInt           header[4], M, N, m, rs, cs, cnt, i, ja, jb;
1192   PetscInt64         nz, hnz;
1193   PetscInt          *rowlens;
1194   PetscInt          *colidxs;
1195   PetscScalar       *matvals;
1196   PetscMPIInt        rank;
1197 
1198   PetscFunctionBegin;
1199   PetscCall(PetscViewerSetUp(viewer));
1200 
1201   M  = mat->rmap->N;
1202   N  = mat->cmap->N;
1203   m  = mat->rmap->n;
1204   rs = mat->rmap->rstart;
1205   cs = mat->cmap->rstart;
1206   nz = A->nz + B->nz;
1207 
1208   /* write matrix header */
1209   header[0] = MAT_FILE_CLASSID;
1210   header[1] = M;
1211   header[2] = N;
1212   PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1213   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1214   if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3]));
1215   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1216 
1217   /* fill in and store row lengths  */
1218   PetscCall(PetscMalloc1(m, &rowlens));
1219   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1220   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1221   PetscCall(PetscFree(rowlens));
1222 
1223   /* fill in and store column indices */
1224   PetscCall(PetscMalloc1(nz, &colidxs));
1225   for (cnt = 0, i = 0; i < m; i++) {
1226     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1227       if (garray[B->j[jb]] > cs) break;
1228       colidxs[cnt++] = garray[B->j[jb]];
1229     }
1230     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1231     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1232   }
1233   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1234   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1235   PetscCall(PetscFree(colidxs));
1236 
1237   /* fill in and store nonzero values */
1238   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1239   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1240   PetscCall(PetscMalloc1(nz, &matvals));
1241   for (cnt = 0, i = 0; i < m; i++) {
1242     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1243       if (garray[B->j[jb]] > cs) break;
1244       matvals[cnt++] = ba[jb];
1245     }
1246     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1247     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1248   }
1249   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1250   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1251   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz);
1252   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1253   PetscCall(PetscFree(matvals));
1254 
1255   /* write block size option to the viewer's .info file */
1256   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1257   PetscFunctionReturn(PETSC_SUCCESS);
1258 }
1259 
1260 #include <petscdraw.h>
1261 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1262 {
1263   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1264   PetscMPIInt       rank = aij->rank, size = aij->size;
1265   PetscBool         isdraw, isascii, isbinary;
1266   PetscViewer       sviewer;
1267   PetscViewerFormat format;
1268 
1269   PetscFunctionBegin;
1270   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1271   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1272   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1273   if (isascii) {
1274     PetscCall(PetscViewerGetFormat(viewer, &format));
1275     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1276       PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz;
1277       PetscCall(PetscMalloc1(size, &nz));
1278       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1279       for (i = 0; i < size; i++) {
1280         nmax = PetscMax(nmax, nz[i]);
1281         nmin = PetscMin(nmin, nz[i]);
1282         navg += nz[i];
1283       }
1284       PetscCall(PetscFree(nz));
1285       navg = navg / size;
1286       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1287       PetscFunctionReturn(PETSC_SUCCESS);
1288     }
1289     PetscCall(PetscViewerGetFormat(viewer, &format));
1290     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1291       MatInfo   info;
1292       PetscInt *inodes = NULL;
1293 
1294       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1295       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1296       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1297       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1298       if (!inodes) {
1299         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1300                                                      info.memory));
1301       } else {
1302         PetscCall(
1303           PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory));
1304       }
1305       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1306       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1307       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1308       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1309       PetscCall(PetscViewerFlush(viewer));
1310       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1311       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1312       PetscCall(VecScatterView(aij->Mvctx, viewer));
1313       PetscFunctionReturn(PETSC_SUCCESS);
1314     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1315       PetscInt inodecount, inodelimit, *inodes;
1316       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1317       if (inodes) {
1318         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1319       } else {
1320         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1321       }
1322       PetscFunctionReturn(PETSC_SUCCESS);
1323     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1324       PetscFunctionReturn(PETSC_SUCCESS);
1325     }
1326   } else if (isbinary) {
1327     if (size == 1) {
1328       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1329       PetscCall(MatView(aij->A, viewer));
1330     } else {
1331       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1332     }
1333     PetscFunctionReturn(PETSC_SUCCESS);
1334   } else if (isascii && size == 1) {
1335     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1336     PetscCall(MatView(aij->A, viewer));
1337     PetscFunctionReturn(PETSC_SUCCESS);
1338   } else if (isdraw) {
1339     PetscDraw draw;
1340     PetscBool isnull;
1341     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1342     PetscCall(PetscDrawIsNull(draw, &isnull));
1343     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1344   }
1345 
1346   { /* assemble the entire matrix onto first processor */
1347     Mat A = NULL, Av;
1348     IS  isrow, iscol;
1349 
1350     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1351     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1352     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1353     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1354     /*  The commented code uses MatCreateSubMatrices instead */
1355     /*
1356     Mat *AA, A = NULL, Av;
1357     IS  isrow,iscol;
1358 
1359     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1360     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1361     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1362     if (rank == 0) {
1363        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1364        A    = AA[0];
1365        Av   = AA[0];
1366     }
1367     PetscCall(MatDestroySubMatrices(1,&AA));
1368 */
1369     PetscCall(ISDestroy(&iscol));
1370     PetscCall(ISDestroy(&isrow));
1371     /*
1372        Everyone has to call to draw the matrix since the graphics waits are
1373        synchronized across all processors that share the PetscDraw object
1374     */
1375     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1376     if (rank == 0) {
1377       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1378       PetscCall(MatView_SeqAIJ(Av, sviewer));
1379     }
1380     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1381     PetscCall(MatDestroy(&A));
1382   }
1383   PetscFunctionReturn(PETSC_SUCCESS);
1384 }
1385 
1386 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1387 {
1388   PetscBool isascii, isdraw, issocket, isbinary;
1389 
1390   PetscFunctionBegin;
1391   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
1392   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1393   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1394   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1395   if (isascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1396   PetscFunctionReturn(PETSC_SUCCESS);
1397 }
1398 
1399 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1400 {
1401   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1402   Vec         bb1 = NULL;
1403   PetscBool   hasop;
1404 
1405   PetscFunctionBegin;
1406   if (flag == SOR_APPLY_UPPER) {
1407     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1408     PetscFunctionReturn(PETSC_SUCCESS);
1409   }
1410 
1411   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1412 
1413   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1414     if (flag & SOR_ZERO_INITIAL_GUESS) {
1415       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1416       its--;
1417     }
1418 
1419     while (its--) {
1420       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1421       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1422 
1423       /* update rhs: bb1 = bb - B*x */
1424       PetscCall(VecScale(mat->lvec, -1.0));
1425       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1426 
1427       /* local sweep */
1428       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1429     }
1430   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1431     if (flag & SOR_ZERO_INITIAL_GUESS) {
1432       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1433       its--;
1434     }
1435     while (its--) {
1436       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1437       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1438 
1439       /* update rhs: bb1 = bb - B*x */
1440       PetscCall(VecScale(mat->lvec, -1.0));
1441       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1442 
1443       /* local sweep */
1444       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1445     }
1446   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1447     if (flag & SOR_ZERO_INITIAL_GUESS) {
1448       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1449       its--;
1450     }
1451     while (its--) {
1452       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1453       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1454 
1455       /* update rhs: bb1 = bb - B*x */
1456       PetscCall(VecScale(mat->lvec, -1.0));
1457       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1458 
1459       /* local sweep */
1460       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1461     }
1462   } else if (flag & SOR_EISENSTAT) {
1463     Vec xx1;
1464 
1465     PetscCall(VecDuplicate(bb, &xx1));
1466     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1467 
1468     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1469     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1470     if (!mat->diag) {
1471       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1472       PetscCall(MatGetDiagonal(matin, mat->diag));
1473     }
1474     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1475     if (hasop) {
1476       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1477     } else {
1478       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1479     }
1480     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1481 
1482     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1483 
1484     /* local sweep */
1485     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1486     PetscCall(VecAXPY(xx, 1.0, xx1));
1487     PetscCall(VecDestroy(&xx1));
1488   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1489 
1490   PetscCall(VecDestroy(&bb1));
1491 
1492   matin->factorerrortype = mat->A->factorerrortype;
1493   PetscFunctionReturn(PETSC_SUCCESS);
1494 }
1495 
1496 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1497 {
1498   Mat             aA, aB, Aperm;
1499   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1500   PetscScalar    *aa, *ba;
1501   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1502   PetscSF         rowsf, sf;
1503   IS              parcolp = NULL;
1504   PetscBool       done;
1505 
1506   PetscFunctionBegin;
1507   PetscCall(MatGetLocalSize(A, &m, &n));
1508   PetscCall(ISGetIndices(rowp, &rwant));
1509   PetscCall(ISGetIndices(colp, &cwant));
1510   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1511 
1512   /* Invert row permutation to find out where my rows should go */
1513   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1514   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1515   PetscCall(PetscSFSetFromOptions(rowsf));
1516   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1517   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1518   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1519 
1520   /* Invert column permutation to find out where my columns should go */
1521   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1522   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1523   PetscCall(PetscSFSetFromOptions(sf));
1524   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1525   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1526   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1527   PetscCall(PetscSFDestroy(&sf));
1528 
1529   PetscCall(ISRestoreIndices(rowp, &rwant));
1530   PetscCall(ISRestoreIndices(colp, &cwant));
1531   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1532 
1533   /* Find out where my gcols should go */
1534   PetscCall(MatGetSize(aB, NULL, &ng));
1535   PetscCall(PetscMalloc1(ng, &gcdest));
1536   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1537   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1538   PetscCall(PetscSFSetFromOptions(sf));
1539   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1540   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1541   PetscCall(PetscSFDestroy(&sf));
1542 
1543   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1544   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1545   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1546   for (i = 0; i < m; i++) {
1547     PetscInt    row = rdest[i];
1548     PetscMPIInt rowner;
1549     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1550     for (j = ai[i]; j < ai[i + 1]; j++) {
1551       PetscInt    col = cdest[aj[j]];
1552       PetscMPIInt cowner;
1553       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1554       if (rowner == cowner) dnnz[i]++;
1555       else onnz[i]++;
1556     }
1557     for (j = bi[i]; j < bi[i + 1]; j++) {
1558       PetscInt    col = gcdest[bj[j]];
1559       PetscMPIInt cowner;
1560       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1561       if (rowner == cowner) dnnz[i]++;
1562       else onnz[i]++;
1563     }
1564   }
1565   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1566   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1567   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1568   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1569   PetscCall(PetscSFDestroy(&rowsf));
1570 
1571   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1572   PetscCall(MatSeqAIJGetArray(aA, &aa));
1573   PetscCall(MatSeqAIJGetArray(aB, &ba));
1574   for (i = 0; i < m; i++) {
1575     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1576     PetscInt  j0, rowlen;
1577     rowlen = ai[i + 1] - ai[i];
1578     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1579       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1580       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1581     }
1582     rowlen = bi[i + 1] - bi[i];
1583     for (j0 = j = 0; j < rowlen; j0 = j) {
1584       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1585       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1586     }
1587   }
1588   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1589   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1590   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1591   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1592   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1593   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1594   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1595   PetscCall(PetscFree3(work, rdest, cdest));
1596   PetscCall(PetscFree(gcdest));
1597   if (parcolp) PetscCall(ISDestroy(&colp));
1598   *B = Aperm;
1599   PetscFunctionReturn(PETSC_SUCCESS);
1600 }
1601 
1602 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1603 {
1604   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1605 
1606   PetscFunctionBegin;
1607   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1608   if (ghosts) *ghosts = aij->garray;
1609   PetscFunctionReturn(PETSC_SUCCESS);
1610 }
1611 
1612 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1613 {
1614   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1615   Mat            A = mat->A, B = mat->B;
1616   PetscLogDouble isend[5], irecv[5];
1617 
1618   PetscFunctionBegin;
1619   info->block_size = 1.0;
1620   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1621 
1622   isend[0] = info->nz_used;
1623   isend[1] = info->nz_allocated;
1624   isend[2] = info->nz_unneeded;
1625   isend[3] = info->memory;
1626   isend[4] = info->mallocs;
1627 
1628   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1629 
1630   isend[0] += info->nz_used;
1631   isend[1] += info->nz_allocated;
1632   isend[2] += info->nz_unneeded;
1633   isend[3] += info->memory;
1634   isend[4] += info->mallocs;
1635   if (flag == MAT_LOCAL) {
1636     info->nz_used      = isend[0];
1637     info->nz_allocated = isend[1];
1638     info->nz_unneeded  = isend[2];
1639     info->memory       = isend[3];
1640     info->mallocs      = isend[4];
1641   } else if (flag == MAT_GLOBAL_MAX) {
1642     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1643 
1644     info->nz_used      = irecv[0];
1645     info->nz_allocated = irecv[1];
1646     info->nz_unneeded  = irecv[2];
1647     info->memory       = irecv[3];
1648     info->mallocs      = irecv[4];
1649   } else if (flag == MAT_GLOBAL_SUM) {
1650     PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1651 
1652     info->nz_used      = irecv[0];
1653     info->nz_allocated = irecv[1];
1654     info->nz_unneeded  = irecv[2];
1655     info->memory       = irecv[3];
1656     info->mallocs      = irecv[4];
1657   }
1658   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1659   info->fill_ratio_needed = 0;
1660   info->factor_mallocs    = 0;
1661   PetscFunctionReturn(PETSC_SUCCESS);
1662 }
1663 
1664 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1665 {
1666   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1667 
1668   PetscFunctionBegin;
1669   switch (op) {
1670   case MAT_NEW_NONZERO_LOCATIONS:
1671   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1672   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1673   case MAT_KEEP_NONZERO_PATTERN:
1674   case MAT_NEW_NONZERO_LOCATION_ERR:
1675   case MAT_USE_INODES:
1676   case MAT_IGNORE_ZERO_ENTRIES:
1677   case MAT_FORM_EXPLICIT_TRANSPOSE:
1678     MatCheckPreallocated(A, 1);
1679     PetscCall(MatSetOption(a->A, op, flg));
1680     PetscCall(MatSetOption(a->B, op, flg));
1681     break;
1682   case MAT_ROW_ORIENTED:
1683     MatCheckPreallocated(A, 1);
1684     a->roworiented = flg;
1685 
1686     PetscCall(MatSetOption(a->A, op, flg));
1687     PetscCall(MatSetOption(a->B, op, flg));
1688     break;
1689   case MAT_IGNORE_OFF_PROC_ENTRIES:
1690     a->donotstash = flg;
1691     break;
1692   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1693   case MAT_SPD:
1694   case MAT_SYMMETRIC:
1695   case MAT_STRUCTURALLY_SYMMETRIC:
1696   case MAT_HERMITIAN:
1697   case MAT_SYMMETRY_ETERNAL:
1698   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1699   case MAT_SPD_ETERNAL:
1700     /* if the diagonal matrix is square it inherits some of the properties above */
1701     if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg));
1702     break;
1703   case MAT_SUBMAT_SINGLEIS:
1704     A->submat_singleis = flg;
1705     break;
1706   default:
1707     break;
1708   }
1709   PetscFunctionReturn(PETSC_SUCCESS);
1710 }
1711 
1712 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1713 {
1714   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1715   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1716   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1717   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1718   PetscInt    *cmap, *idx_p;
1719 
1720   PetscFunctionBegin;
1721   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1722   mat->getrowactive = PETSC_TRUE;
1723 
1724   if (!mat->rowvalues && (idx || v)) {
1725     /*
1726         allocate enough space to hold information from the longest row.
1727     */
1728     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1729     PetscInt    max = 1, tmp;
1730     for (i = 0; i < matin->rmap->n; i++) {
1731       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1732       if (max < tmp) max = tmp;
1733     }
1734     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1735   }
1736 
1737   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1738   lrow = row - rstart;
1739 
1740   pvA = &vworkA;
1741   pcA = &cworkA;
1742   pvB = &vworkB;
1743   pcB = &cworkB;
1744   if (!v) {
1745     pvA = NULL;
1746     pvB = NULL;
1747   }
1748   if (!idx) {
1749     pcA = NULL;
1750     if (!v) pcB = NULL;
1751   }
1752   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1753   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1754   nztot = nzA + nzB;
1755 
1756   cmap = mat->garray;
1757   if (v || idx) {
1758     if (nztot) {
1759       /* Sort by increasing column numbers, assuming A and B already sorted */
1760       PetscInt imark = -1;
1761       if (v) {
1762         *v = v_p = mat->rowvalues;
1763         for (i = 0; i < nzB; i++) {
1764           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1765           else break;
1766         }
1767         imark = i;
1768         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1769         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1770       }
1771       if (idx) {
1772         *idx = idx_p = mat->rowindices;
1773         if (imark > -1) {
1774           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1775         } else {
1776           for (i = 0; i < nzB; i++) {
1777             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1778             else break;
1779           }
1780           imark = i;
1781         }
1782         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1783         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1784       }
1785     } else {
1786       if (idx) *idx = NULL;
1787       if (v) *v = NULL;
1788     }
1789   }
1790   *nz = nztot;
1791   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1792   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1793   PetscFunctionReturn(PETSC_SUCCESS);
1794 }
1795 
1796 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1797 {
1798   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1799 
1800   PetscFunctionBegin;
1801   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1802   aij->getrowactive = PETSC_FALSE;
1803   PetscFunctionReturn(PETSC_SUCCESS);
1804 }
1805 
1806 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1807 {
1808   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1809   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1810   PetscInt         i, j, cstart = mat->cmap->rstart;
1811   PetscReal        sum = 0.0;
1812   const MatScalar *v, *amata, *bmata;
1813 
1814   PetscFunctionBegin;
1815   if (aij->size == 1) {
1816     PetscCall(MatNorm(aij->A, type, norm));
1817   } else {
1818     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1819     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1820     if (type == NORM_FROBENIUS) {
1821       v = amata;
1822       for (i = 0; i < amat->nz; i++) {
1823         sum += PetscRealPart(PetscConj(*v) * (*v));
1824         v++;
1825       }
1826       v = bmata;
1827       for (i = 0; i < bmat->nz; i++) {
1828         sum += PetscRealPart(PetscConj(*v) * (*v));
1829         v++;
1830       }
1831       PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1832       *norm = PetscSqrtReal(*norm);
1833       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1834     } else if (type == NORM_1) { /* max column norm */
1835       PetscReal *tmp;
1836       PetscInt  *jj, *garray = aij->garray;
1837       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1838       *norm = 0.0;
1839       v     = amata;
1840       jj    = amat->j;
1841       for (j = 0; j < amat->nz; j++) {
1842         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1843         v++;
1844       }
1845       v  = bmata;
1846       jj = bmat->j;
1847       for (j = 0; j < bmat->nz; j++) {
1848         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1849         v++;
1850       }
1851       PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1852       for (j = 0; j < mat->cmap->N; j++) {
1853         if (tmp[j] > *norm) *norm = tmp[j];
1854       }
1855       PetscCall(PetscFree(tmp));
1856       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1857     } else if (type == NORM_INFINITY) { /* max row norm */
1858       PetscReal ntemp = 0.0;
1859       for (j = 0; j < aij->A->rmap->n; j++) {
1860         v   = PetscSafePointerPlusOffset(amata, amat->i[j]);
1861         sum = 0.0;
1862         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1863           sum += PetscAbsScalar(*v);
1864           v++;
1865         }
1866         v = PetscSafePointerPlusOffset(bmata, bmat->i[j]);
1867         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1868           sum += PetscAbsScalar(*v);
1869           v++;
1870         }
1871         if (sum > ntemp) ntemp = sum;
1872       }
1873       PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1874       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1875     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1876     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1877     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1878   }
1879   PetscFunctionReturn(PETSC_SUCCESS);
1880 }
1881 
1882 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1883 {
1884   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1885   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1886   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1887   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1888   Mat              B, A_diag, *B_diag;
1889   const MatScalar *pbv, *bv;
1890 
1891   PetscFunctionBegin;
1892   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1893   ma = A->rmap->n;
1894   na = A->cmap->n;
1895   mb = a->B->rmap->n;
1896   nb = a->B->cmap->n;
1897   ai = Aloc->i;
1898   aj = Aloc->j;
1899   bi = Bloc->i;
1900   bj = Bloc->j;
1901   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1902     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1903     PetscSFNode         *oloc;
1904     PETSC_UNUSED PetscSF sf;
1905 
1906     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1907     /* compute d_nnz for preallocation */
1908     PetscCall(PetscArrayzero(d_nnz, na));
1909     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1910     /* compute local off-diagonal contributions */
1911     PetscCall(PetscArrayzero(g_nnz, nb));
1912     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1913     /* map those to global */
1914     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1915     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1916     PetscCall(PetscSFSetFromOptions(sf));
1917     PetscCall(PetscArrayzero(o_nnz, na));
1918     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1919     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1920     PetscCall(PetscSFDestroy(&sf));
1921 
1922     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1923     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1924     PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs));
1925     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1926     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1927     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1928   } else {
1929     B = *matout;
1930     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1931   }
1932 
1933   b           = (Mat_MPIAIJ *)B->data;
1934   A_diag      = a->A;
1935   B_diag      = &b->A;
1936   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1937   A_diag_ncol = A_diag->cmap->N;
1938   B_diag_ilen = sub_B_diag->ilen;
1939   B_diag_i    = sub_B_diag->i;
1940 
1941   /* Set ilen for diagonal of B */
1942   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1943 
1944   /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done
1945   very quickly (=without using MatSetValues), because all writes are local. */
1946   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1947   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1948 
1949   /* copy over the B part */
1950   PetscCall(PetscMalloc1(bi[mb], &cols));
1951   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1952   pbv = bv;
1953   row = A->rmap->rstart;
1954   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1955   cols_tmp = cols;
1956   for (i = 0; i < mb; i++) {
1957     ncol = bi[i + 1] - bi[i];
1958     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1959     row++;
1960     if (pbv) pbv += ncol;
1961     if (cols_tmp) cols_tmp += ncol;
1962   }
1963   PetscCall(PetscFree(cols));
1964   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1965 
1966   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1967   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1968   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1969     *matout = B;
1970   } else {
1971     PetscCall(MatHeaderMerge(A, &B));
1972   }
1973   PetscFunctionReturn(PETSC_SUCCESS);
1974 }
1975 
1976 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
1977 {
1978   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1979   Mat         a = aij->A, b = aij->B;
1980   PetscInt    s1, s2, s3;
1981 
1982   PetscFunctionBegin;
1983   PetscCall(MatGetLocalSize(mat, &s2, &s3));
1984   if (rr) {
1985     PetscCall(VecGetLocalSize(rr, &s1));
1986     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
1987     /* Overlap communication with computation. */
1988     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1989   }
1990   if (ll) {
1991     PetscCall(VecGetLocalSize(ll, &s1));
1992     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1993     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1994   }
1995   /* scale  the diagonal block */
1996   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1997 
1998   if (rr) {
1999     /* Do a scatter end and then right scale the off-diagonal block */
2000     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2001     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2002   }
2003   PetscFunctionReturn(PETSC_SUCCESS);
2004 }
2005 
2006 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2007 {
2008   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2009 
2010   PetscFunctionBegin;
2011   PetscCall(MatSetUnfactored(a->A));
2012   PetscFunctionReturn(PETSC_SUCCESS);
2013 }
2014 
2015 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2016 {
2017   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2018   Mat         a, b, c, d;
2019   PetscBool   flg;
2020 
2021   PetscFunctionBegin;
2022   a = matA->A;
2023   b = matA->B;
2024   c = matB->A;
2025   d = matB->B;
2026 
2027   PetscCall(MatEqual(a, c, &flg));
2028   if (flg) PetscCall(MatEqual(b, d, &flg));
2029   PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2030   PetscFunctionReturn(PETSC_SUCCESS);
2031 }
2032 
2033 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2034 {
2035   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2036   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2037 
2038   PetscFunctionBegin;
2039   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2040   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2041     /* because of the column compression in the off-processor part of the matrix a->B,
2042        the number of columns in a->B and b->B may be different, hence we cannot call
2043        the MatCopy() directly on the two parts. If need be, we can provide a more
2044        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2045        then copying the submatrices */
2046     PetscCall(MatCopy_Basic(A, B, str));
2047   } else {
2048     PetscCall(MatCopy(a->A, b->A, str));
2049     PetscCall(MatCopy(a->B, b->B, str));
2050   }
2051   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2052   PetscFunctionReturn(PETSC_SUCCESS);
2053 }
2054 
2055 /*
2056    Computes the number of nonzeros per row needed for preallocation when X and Y
2057    have different nonzero structure.
2058 */
2059 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2060 {
2061   PetscInt i, j, k, nzx, nzy;
2062 
2063   PetscFunctionBegin;
2064   /* Set the number of nonzeros in the new matrix */
2065   for (i = 0; i < m; i++) {
2066     const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]);
2067     nzx    = xi[i + 1] - xi[i];
2068     nzy    = yi[i + 1] - yi[i];
2069     nnz[i] = 0;
2070     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2071       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2072       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2073       nnz[i]++;
2074     }
2075     for (; k < nzy; k++) nnz[i]++;
2076   }
2077   PetscFunctionReturn(PETSC_SUCCESS);
2078 }
2079 
2080 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2081 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2082 {
2083   PetscInt    m = Y->rmap->N;
2084   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2085   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2086 
2087   PetscFunctionBegin;
2088   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2089   PetscFunctionReturn(PETSC_SUCCESS);
2090 }
2091 
2092 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2093 {
2094   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2095 
2096   PetscFunctionBegin;
2097   if (str == SAME_NONZERO_PATTERN) {
2098     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2099     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2100   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2101     PetscCall(MatAXPY_Basic(Y, a, X, str));
2102   } else {
2103     Mat       B;
2104     PetscInt *nnz_d, *nnz_o;
2105 
2106     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2107     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2108     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2109     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2110     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2111     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2112     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2113     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2114     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2115     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2116     PetscCall(MatHeaderMerge(Y, &B));
2117     PetscCall(PetscFree(nnz_d));
2118     PetscCall(PetscFree(nnz_o));
2119   }
2120   PetscFunctionReturn(PETSC_SUCCESS);
2121 }
2122 
2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2124 
2125 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2126 {
2127   PetscFunctionBegin;
2128   if (PetscDefined(USE_COMPLEX)) {
2129     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2130 
2131     PetscCall(MatConjugate_SeqAIJ(aij->A));
2132     PetscCall(MatConjugate_SeqAIJ(aij->B));
2133   }
2134   PetscFunctionReturn(PETSC_SUCCESS);
2135 }
2136 
2137 static PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2138 {
2139   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2140 
2141   PetscFunctionBegin;
2142   PetscCall(MatRealPart(a->A));
2143   PetscCall(MatRealPart(a->B));
2144   PetscFunctionReturn(PETSC_SUCCESS);
2145 }
2146 
2147 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2148 {
2149   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2150 
2151   PetscFunctionBegin;
2152   PetscCall(MatImaginaryPart(a->A));
2153   PetscCall(MatImaginaryPart(a->B));
2154   PetscFunctionReturn(PETSC_SUCCESS);
2155 }
2156 
2157 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2158 {
2159   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2160   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2161   PetscScalar       *vv;
2162   Vec                vB, vA;
2163   const PetscScalar *va, *vb;
2164 
2165   PetscFunctionBegin;
2166   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2167   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2168 
2169   PetscCall(VecGetArrayRead(vA, &va));
2170   if (idx) {
2171     for (i = 0; i < m; i++) {
2172       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2173     }
2174   }
2175 
2176   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2177   PetscCall(PetscMalloc1(m, &idxb));
2178   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2179 
2180   PetscCall(VecGetArrayWrite(v, &vv));
2181   PetscCall(VecGetArrayRead(vB, &vb));
2182   for (i = 0; i < m; i++) {
2183     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2184       vv[i] = vb[i];
2185       if (idx) idx[i] = a->garray[idxb[i]];
2186     } else {
2187       vv[i] = va[i];
2188       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2189     }
2190   }
2191   PetscCall(VecRestoreArrayWrite(v, &vv));
2192   PetscCall(VecRestoreArrayRead(vA, &va));
2193   PetscCall(VecRestoreArrayRead(vB, &vb));
2194   PetscCall(PetscFree(idxb));
2195   PetscCall(VecDestroy(&vA));
2196   PetscCall(VecDestroy(&vB));
2197   PetscFunctionReturn(PETSC_SUCCESS);
2198 }
2199 
2200 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v)
2201 {
2202   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2203   Vec         vB, vA;
2204 
2205   PetscFunctionBegin;
2206   PetscCall(MatCreateVecs(a->A, NULL, &vA));
2207   PetscCall(MatGetRowSumAbs(a->A, vA));
2208   PetscCall(MatCreateVecs(a->B, NULL, &vB));
2209   PetscCall(MatGetRowSumAbs(a->B, vB));
2210   PetscCall(VecAXPY(vA, 1.0, vB));
2211   PetscCall(VecDestroy(&vB));
2212   PetscCall(VecCopy(vA, v));
2213   PetscCall(VecDestroy(&vA));
2214   PetscFunctionReturn(PETSC_SUCCESS);
2215 }
2216 
2217 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2218 {
2219   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2220   PetscInt           m = A->rmap->n, n = A->cmap->n;
2221   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2222   PetscInt          *cmap = mat->garray;
2223   PetscInt          *diagIdx, *offdiagIdx;
2224   Vec                diagV, offdiagV;
2225   PetscScalar       *a, *diagA, *offdiagA;
2226   const PetscScalar *ba, *bav;
2227   PetscInt           r, j, col, ncols, *bi, *bj;
2228   Mat                B = mat->B;
2229   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2230 
2231   PetscFunctionBegin;
2232   /* When a process holds entire A and other processes have no entry */
2233   if (A->cmap->N == n) {
2234     PetscCall(VecGetArrayWrite(v, &diagA));
2235     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2236     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2237     PetscCall(VecDestroy(&diagV));
2238     PetscCall(VecRestoreArrayWrite(v, &diagA));
2239     PetscFunctionReturn(PETSC_SUCCESS);
2240   } else if (n == 0) {
2241     if (m) {
2242       PetscCall(VecGetArrayWrite(v, &a));
2243       for (r = 0; r < m; r++) {
2244         a[r] = 0.0;
2245         if (idx) idx[r] = -1;
2246       }
2247       PetscCall(VecRestoreArrayWrite(v, &a));
2248     }
2249     PetscFunctionReturn(PETSC_SUCCESS);
2250   }
2251 
2252   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2253   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2254   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2255   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2256 
2257   /* Get offdiagIdx[] for implicit 0.0 */
2258   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2259   ba = bav;
2260   bi = b->i;
2261   bj = b->j;
2262   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2263   for (r = 0; r < m; r++) {
2264     ncols = bi[r + 1] - bi[r];
2265     if (ncols == A->cmap->N - n) { /* Brow is dense */
2266       offdiagA[r]   = *ba;
2267       offdiagIdx[r] = cmap[0];
2268     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2269       offdiagA[r] = 0.0;
2270 
2271       /* Find first hole in the cmap */
2272       for (j = 0; j < ncols; j++) {
2273         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2274         if (col > j && j < cstart) {
2275           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2276           break;
2277         } else if (col > j + n && j >= cstart) {
2278           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2279           break;
2280         }
2281       }
2282       if (j == ncols && ncols < A->cmap->N - n) {
2283         /* a hole is outside compressed Bcols */
2284         if (ncols == 0) {
2285           if (cstart) {
2286             offdiagIdx[r] = 0;
2287           } else offdiagIdx[r] = cend;
2288         } else { /* ncols > 0 */
2289           offdiagIdx[r] = cmap[ncols - 1] + 1;
2290           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2291         }
2292       }
2293     }
2294 
2295     for (j = 0; j < ncols; j++) {
2296       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2297         offdiagA[r]   = *ba;
2298         offdiagIdx[r] = cmap[*bj];
2299       }
2300       ba++;
2301       bj++;
2302     }
2303   }
2304 
2305   PetscCall(VecGetArrayWrite(v, &a));
2306   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2307   for (r = 0; r < m; ++r) {
2308     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2309       a[r] = diagA[r];
2310       if (idx) idx[r] = cstart + diagIdx[r];
2311     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2312       a[r] = diagA[r];
2313       if (idx) {
2314         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2315           idx[r] = cstart + diagIdx[r];
2316         } else idx[r] = offdiagIdx[r];
2317       }
2318     } else {
2319       a[r] = offdiagA[r];
2320       if (idx) idx[r] = offdiagIdx[r];
2321     }
2322   }
2323   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2324   PetscCall(VecRestoreArrayWrite(v, &a));
2325   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2326   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2327   PetscCall(VecDestroy(&diagV));
2328   PetscCall(VecDestroy(&offdiagV));
2329   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2330   PetscFunctionReturn(PETSC_SUCCESS);
2331 }
2332 
2333 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2334 {
2335   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2336   PetscInt           m = A->rmap->n, n = A->cmap->n;
2337   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2338   PetscInt          *cmap = mat->garray;
2339   PetscInt          *diagIdx, *offdiagIdx;
2340   Vec                diagV, offdiagV;
2341   PetscScalar       *a, *diagA, *offdiagA;
2342   const PetscScalar *ba, *bav;
2343   PetscInt           r, j, col, ncols, *bi, *bj;
2344   Mat                B = mat->B;
2345   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2346 
2347   PetscFunctionBegin;
2348   /* When a process holds entire A and other processes have no entry */
2349   if (A->cmap->N == n) {
2350     PetscCall(VecGetArrayWrite(v, &diagA));
2351     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2352     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2353     PetscCall(VecDestroy(&diagV));
2354     PetscCall(VecRestoreArrayWrite(v, &diagA));
2355     PetscFunctionReturn(PETSC_SUCCESS);
2356   } else if (n == 0) {
2357     if (m) {
2358       PetscCall(VecGetArrayWrite(v, &a));
2359       for (r = 0; r < m; r++) {
2360         a[r] = PETSC_MAX_REAL;
2361         if (idx) idx[r] = -1;
2362       }
2363       PetscCall(VecRestoreArrayWrite(v, &a));
2364     }
2365     PetscFunctionReturn(PETSC_SUCCESS);
2366   }
2367 
2368   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2369   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2370   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2371   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2372 
2373   /* Get offdiagIdx[] for implicit 0.0 */
2374   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2375   ba = bav;
2376   bi = b->i;
2377   bj = b->j;
2378   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2379   for (r = 0; r < m; r++) {
2380     ncols = bi[r + 1] - bi[r];
2381     if (ncols == A->cmap->N - n) { /* Brow is dense */
2382       offdiagA[r]   = *ba;
2383       offdiagIdx[r] = cmap[0];
2384     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2385       offdiagA[r] = 0.0;
2386 
2387       /* Find first hole in the cmap */
2388       for (j = 0; j < ncols; j++) {
2389         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2390         if (col > j && j < cstart) {
2391           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2392           break;
2393         } else if (col > j + n && j >= cstart) {
2394           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2395           break;
2396         }
2397       }
2398       if (j == ncols && ncols < A->cmap->N - n) {
2399         /* a hole is outside compressed Bcols */
2400         if (ncols == 0) {
2401           if (cstart) {
2402             offdiagIdx[r] = 0;
2403           } else offdiagIdx[r] = cend;
2404         } else { /* ncols > 0 */
2405           offdiagIdx[r] = cmap[ncols - 1] + 1;
2406           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2407         }
2408       }
2409     }
2410 
2411     for (j = 0; j < ncols; j++) {
2412       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2413         offdiagA[r]   = *ba;
2414         offdiagIdx[r] = cmap[*bj];
2415       }
2416       ba++;
2417       bj++;
2418     }
2419   }
2420 
2421   PetscCall(VecGetArrayWrite(v, &a));
2422   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2423   for (r = 0; r < m; ++r) {
2424     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2425       a[r] = diagA[r];
2426       if (idx) idx[r] = cstart + diagIdx[r];
2427     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2428       a[r] = diagA[r];
2429       if (idx) {
2430         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2431           idx[r] = cstart + diagIdx[r];
2432         } else idx[r] = offdiagIdx[r];
2433       }
2434     } else {
2435       a[r] = offdiagA[r];
2436       if (idx) idx[r] = offdiagIdx[r];
2437     }
2438   }
2439   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2440   PetscCall(VecRestoreArrayWrite(v, &a));
2441   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2442   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2443   PetscCall(VecDestroy(&diagV));
2444   PetscCall(VecDestroy(&offdiagV));
2445   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2446   PetscFunctionReturn(PETSC_SUCCESS);
2447 }
2448 
2449 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2450 {
2451   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2452   PetscInt           m = A->rmap->n, n = A->cmap->n;
2453   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2454   PetscInt          *cmap = mat->garray;
2455   PetscInt          *diagIdx, *offdiagIdx;
2456   Vec                diagV, offdiagV;
2457   PetscScalar       *a, *diagA, *offdiagA;
2458   const PetscScalar *ba, *bav;
2459   PetscInt           r, j, col, ncols, *bi, *bj;
2460   Mat                B = mat->B;
2461   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2462 
2463   PetscFunctionBegin;
2464   /* When a process holds entire A and other processes have no entry */
2465   if (A->cmap->N == n) {
2466     PetscCall(VecGetArrayWrite(v, &diagA));
2467     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2468     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2469     PetscCall(VecDestroy(&diagV));
2470     PetscCall(VecRestoreArrayWrite(v, &diagA));
2471     PetscFunctionReturn(PETSC_SUCCESS);
2472   } else if (n == 0) {
2473     if (m) {
2474       PetscCall(VecGetArrayWrite(v, &a));
2475       for (r = 0; r < m; r++) {
2476         a[r] = PETSC_MIN_REAL;
2477         if (idx) idx[r] = -1;
2478       }
2479       PetscCall(VecRestoreArrayWrite(v, &a));
2480     }
2481     PetscFunctionReturn(PETSC_SUCCESS);
2482   }
2483 
2484   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2485   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2486   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2487   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2488 
2489   /* Get offdiagIdx[] for implicit 0.0 */
2490   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2491   ba = bav;
2492   bi = b->i;
2493   bj = b->j;
2494   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2495   for (r = 0; r < m; r++) {
2496     ncols = bi[r + 1] - bi[r];
2497     if (ncols == A->cmap->N - n) { /* Brow is dense */
2498       offdiagA[r]   = *ba;
2499       offdiagIdx[r] = cmap[0];
2500     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2501       offdiagA[r] = 0.0;
2502 
2503       /* Find first hole in the cmap */
2504       for (j = 0; j < ncols; j++) {
2505         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2506         if (col > j && j < cstart) {
2507           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2508           break;
2509         } else if (col > j + n && j >= cstart) {
2510           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2511           break;
2512         }
2513       }
2514       if (j == ncols && ncols < A->cmap->N - n) {
2515         /* a hole is outside compressed Bcols */
2516         if (ncols == 0) {
2517           if (cstart) {
2518             offdiagIdx[r] = 0;
2519           } else offdiagIdx[r] = cend;
2520         } else { /* ncols > 0 */
2521           offdiagIdx[r] = cmap[ncols - 1] + 1;
2522           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2523         }
2524       }
2525     }
2526 
2527     for (j = 0; j < ncols; j++) {
2528       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2529         offdiagA[r]   = *ba;
2530         offdiagIdx[r] = cmap[*bj];
2531       }
2532       ba++;
2533       bj++;
2534     }
2535   }
2536 
2537   PetscCall(VecGetArrayWrite(v, &a));
2538   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2539   for (r = 0; r < m; ++r) {
2540     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2541       a[r] = diagA[r];
2542       if (idx) idx[r] = cstart + diagIdx[r];
2543     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2544       a[r] = diagA[r];
2545       if (idx) {
2546         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2547           idx[r] = cstart + diagIdx[r];
2548         } else idx[r] = offdiagIdx[r];
2549       }
2550     } else {
2551       a[r] = offdiagA[r];
2552       if (idx) idx[r] = offdiagIdx[r];
2553     }
2554   }
2555   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2556   PetscCall(VecRestoreArrayWrite(v, &a));
2557   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2558   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2559   PetscCall(VecDestroy(&diagV));
2560   PetscCall(VecDestroy(&offdiagV));
2561   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2562   PetscFunctionReturn(PETSC_SUCCESS);
2563 }
2564 
2565 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2566 {
2567   Mat *dummy;
2568 
2569   PetscFunctionBegin;
2570   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2571   *newmat = *dummy;
2572   PetscCall(PetscFree(dummy));
2573   PetscFunctionReturn(PETSC_SUCCESS);
2574 }
2575 
2576 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2577 {
2578   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2579 
2580   PetscFunctionBegin;
2581   PetscCall(MatInvertBlockDiagonal(a->A, values));
2582   A->factorerrortype = a->A->factorerrortype;
2583   PetscFunctionReturn(PETSC_SUCCESS);
2584 }
2585 
2586 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2587 {
2588   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2589 
2590   PetscFunctionBegin;
2591   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2592   PetscCall(MatSetRandom(aij->A, rctx));
2593   if (x->assembled) {
2594     PetscCall(MatSetRandom(aij->B, rctx));
2595   } else {
2596     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2597   }
2598   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2599   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2600   PetscFunctionReturn(PETSC_SUCCESS);
2601 }
2602 
2603 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2604 {
2605   PetscFunctionBegin;
2606   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2607   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2608   PetscFunctionReturn(PETSC_SUCCESS);
2609 }
2610 
2611 /*@
2612   MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2613 
2614   Not Collective
2615 
2616   Input Parameter:
2617 . A - the matrix
2618 
2619   Output Parameter:
2620 . nz - the number of nonzeros
2621 
2622   Level: advanced
2623 
2624 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2625 @*/
2626 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2627 {
2628   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2629   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2630   PetscBool   isaij;
2631 
2632   PetscFunctionBegin;
2633   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij));
2634   PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name);
2635   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2636   PetscFunctionReturn(PETSC_SUCCESS);
2637 }
2638 
2639 /*@
2640   MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2641 
2642   Collective
2643 
2644   Input Parameters:
2645 + A  - the matrix
2646 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2647 
2648   Level: advanced
2649 
2650 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`
2651 @*/
2652 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2653 {
2654   PetscFunctionBegin;
2655   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2656   PetscFunctionReturn(PETSC_SUCCESS);
2657 }
2658 
2659 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject)
2660 {
2661   PetscBool sc = PETSC_FALSE, flg;
2662 
2663   PetscFunctionBegin;
2664   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2665   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2666   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2667   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2668   PetscOptionsHeadEnd();
2669   PetscFunctionReturn(PETSC_SUCCESS);
2670 }
2671 
2672 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2673 {
2674   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2675   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2676 
2677   PetscFunctionBegin;
2678   if (!Y->preallocated) {
2679     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2680   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2681     PetscInt nonew = aij->nonew;
2682     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2683     aij->nonew = nonew;
2684   }
2685   PetscCall(MatShift_Basic(Y, a));
2686   PetscFunctionReturn(PETSC_SUCCESS);
2687 }
2688 
2689 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2690 {
2691   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2692 
2693   PetscFunctionBegin;
2694   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2695   PetscCall(MatMissingDiagonal(a->A, missing, d));
2696   if (d) {
2697     PetscInt rstart;
2698     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2699     *d += rstart;
2700   }
2701   PetscFunctionReturn(PETSC_SUCCESS);
2702 }
2703 
2704 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2705 {
2706   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2707 
2708   PetscFunctionBegin;
2709   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2710   PetscFunctionReturn(PETSC_SUCCESS);
2711 }
2712 
2713 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep)
2714 {
2715   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2716 
2717   PetscFunctionBegin;
2718   PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep));        // possibly keep zero diagonal coefficients
2719   PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
2720   PetscFunctionReturn(PETSC_SUCCESS);
2721 }
2722 
2723 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2724                                        MatGetRow_MPIAIJ,
2725                                        MatRestoreRow_MPIAIJ,
2726                                        MatMult_MPIAIJ,
2727                                        /* 4*/ MatMultAdd_MPIAIJ,
2728                                        MatMultTranspose_MPIAIJ,
2729                                        MatMultTransposeAdd_MPIAIJ,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        /*10*/ NULL,
2734                                        NULL,
2735                                        NULL,
2736                                        MatSOR_MPIAIJ,
2737                                        MatTranspose_MPIAIJ,
2738                                        /*15*/ MatGetInfo_MPIAIJ,
2739                                        MatEqual_MPIAIJ,
2740                                        MatGetDiagonal_MPIAIJ,
2741                                        MatDiagonalScale_MPIAIJ,
2742                                        MatNorm_MPIAIJ,
2743                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2744                                        MatAssemblyEnd_MPIAIJ,
2745                                        MatSetOption_MPIAIJ,
2746                                        MatZeroEntries_MPIAIJ,
2747                                        /*24*/ MatZeroRows_MPIAIJ,
2748                                        NULL,
2749                                        NULL,
2750                                        NULL,
2751                                        NULL,
2752                                        /*29*/ MatSetUp_MPI_Hash,
2753                                        NULL,
2754                                        NULL,
2755                                        MatGetDiagonalBlock_MPIAIJ,
2756                                        NULL,
2757                                        /*34*/ MatDuplicate_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        /*39*/ MatAXPY_MPIAIJ,
2763                                        MatCreateSubMatrices_MPIAIJ,
2764                                        MatIncreaseOverlap_MPIAIJ,
2765                                        MatGetValues_MPIAIJ,
2766                                        MatCopy_MPIAIJ,
2767                                        /*44*/ MatGetRowMax_MPIAIJ,
2768                                        MatScale_MPIAIJ,
2769                                        MatShift_MPIAIJ,
2770                                        MatDiagonalSet_MPIAIJ,
2771                                        MatZeroRowsColumns_MPIAIJ,
2772                                        /*49*/ MatSetRandom_MPIAIJ,
2773                                        MatGetRowIJ_MPIAIJ,
2774                                        MatRestoreRowIJ_MPIAIJ,
2775                                        NULL,
2776                                        NULL,
2777                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2778                                        NULL,
2779                                        MatSetUnfactored_MPIAIJ,
2780                                        MatPermute_MPIAIJ,
2781                                        NULL,
2782                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2783                                        MatDestroy_MPIAIJ,
2784                                        MatView_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                        /*64*/ MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2788                                        NULL,
2789                                        NULL,
2790                                        NULL,
2791                                        MatGetRowMaxAbs_MPIAIJ,
2792                                        /*69*/ MatGetRowMinAbs_MPIAIJ,
2793                                        NULL,
2794                                        NULL,
2795                                        MatFDColoringApply_AIJ,
2796                                        MatSetFromOptions_MPIAIJ,
2797                                        MatFindZeroDiagonals_MPIAIJ,
2798                                        /*75*/ NULL,
2799                                        NULL,
2800                                        NULL,
2801                                        MatLoad_MPIAIJ,
2802                                        NULL,
2803                                        /*80*/ NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        /*83*/ NULL,
2807                                        NULL,
2808                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2809                                        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        /*89*/ MatBindToCPU_MPIAIJ,
2813                                        MatProductSetFromOptions_MPIAIJ,
2814                                        NULL,
2815                                        NULL,
2816                                        MatConjugate_MPIAIJ,
2817                                        /*94*/ NULL,
2818                                        MatSetValuesRow_MPIAIJ,
2819                                        MatRealPart_MPIAIJ,
2820                                        MatImaginaryPart_MPIAIJ,
2821                                        NULL,
2822                                        /*99*/ NULL,
2823                                        NULL,
2824                                        NULL,
2825                                        MatGetRowMin_MPIAIJ,
2826                                        NULL,
2827                                        /*104*/ MatMissingDiagonal_MPIAIJ,
2828                                        MatGetSeqNonzeroStructure_MPIAIJ,
2829                                        NULL,
2830                                        MatGetGhosts_MPIAIJ,
2831                                        NULL,
2832                                        /*109*/ NULL,
2833                                        MatMultDiagonalBlock_MPIAIJ,
2834                                        NULL,
2835                                        NULL,
2836                                        NULL,
2837                                        /*114*/ MatGetMultiProcBlock_MPIAIJ,
2838                                        MatFindNonzeroRows_MPIAIJ,
2839                                        MatGetColumnReductions_MPIAIJ,
2840                                        MatInvertBlockDiagonal_MPIAIJ,
2841                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2842                                        /*119*/ MatCreateSubMatricesMPI_MPIAIJ,
2843                                        NULL,
2844                                        NULL,
2845                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2846                                        NULL,
2847                                        /*124*/ NULL,
2848                                        NULL,
2849                                        NULL,
2850                                        MatSetBlockSizes_MPIAIJ,
2851                                        NULL,
2852                                        /*129*/ MatFDColoringSetUp_MPIXAIJ,
2853                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2854                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2855                                        NULL,
2856                                        NULL,
2857                                        /*134*/ NULL,
2858                                        MatCreateGraph_Simple_AIJ,
2859                                        NULL,
2860                                        MatEliminateZeros_MPIAIJ,
2861                                        MatGetRowSumAbs_MPIAIJ,
2862                                        /*139*/ NULL,
2863                                        NULL,
2864                                        NULL,
2865                                        MatCopyHashToXAIJ_MPI_Hash,
2866                                        MatGetCurrentMemType_MPIAIJ};
2867 
2868 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2869 {
2870   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2871 
2872   PetscFunctionBegin;
2873   PetscCall(MatStoreValues(aij->A));
2874   PetscCall(MatStoreValues(aij->B));
2875   PetscFunctionReturn(PETSC_SUCCESS);
2876 }
2877 
2878 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2879 {
2880   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2881 
2882   PetscFunctionBegin;
2883   PetscCall(MatRetrieveValues(aij->A));
2884   PetscCall(MatRetrieveValues(aij->B));
2885   PetscFunctionReturn(PETSC_SUCCESS);
2886 }
2887 
2888 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2889 {
2890   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2891   PetscMPIInt size;
2892 
2893   PetscFunctionBegin;
2894   if (B->hash_active) {
2895     B->ops[0]      = b->cops;
2896     B->hash_active = PETSC_FALSE;
2897   }
2898   PetscCall(PetscLayoutSetUp(B->rmap));
2899   PetscCall(PetscLayoutSetUp(B->cmap));
2900 
2901 #if defined(PETSC_USE_CTABLE)
2902   PetscCall(PetscHMapIDestroy(&b->colmap));
2903 #else
2904   PetscCall(PetscFree(b->colmap));
2905 #endif
2906   PetscCall(PetscFree(b->garray));
2907   PetscCall(VecDestroy(&b->lvec));
2908   PetscCall(VecScatterDestroy(&b->Mvctx));
2909 
2910   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2911 
2912   MatSeqXAIJGetOptions_Private(b->B);
2913   PetscCall(MatDestroy(&b->B));
2914   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2915   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2916   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2917   PetscCall(MatSetType(b->B, MATSEQAIJ));
2918   MatSeqXAIJRestoreOptions_Private(b->B);
2919 
2920   MatSeqXAIJGetOptions_Private(b->A);
2921   PetscCall(MatDestroy(&b->A));
2922   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2923   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2924   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2925   PetscCall(MatSetType(b->A, MATSEQAIJ));
2926   MatSeqXAIJRestoreOptions_Private(b->A);
2927 
2928   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2929   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2930   B->preallocated  = PETSC_TRUE;
2931   B->was_assembled = PETSC_FALSE;
2932   B->assembled     = PETSC_FALSE;
2933   PetscFunctionReturn(PETSC_SUCCESS);
2934 }
2935 
2936 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2937 {
2938   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2939   PetscBool   ondiagreset, offdiagreset, memoryreset;
2940 
2941   PetscFunctionBegin;
2942   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2943   PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()");
2944   if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS);
2945 
2946   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset));
2947   PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset));
2948   memoryreset = (PetscBool)(ondiagreset || offdiagreset);
2949   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B)));
2950   if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS);
2951 
2952   PetscCall(PetscLayoutSetUp(B->rmap));
2953   PetscCall(PetscLayoutSetUp(B->cmap));
2954   PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled");
2955   PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE));
2956   PetscCall(VecScatterDestroy(&b->Mvctx));
2957 
2958   B->preallocated  = PETSC_TRUE;
2959   B->was_assembled = PETSC_FALSE;
2960   B->assembled     = PETSC_FALSE;
2961   /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */
2962   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2963   PetscFunctionReturn(PETSC_SUCCESS);
2964 }
2965 
2966 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2967 {
2968   Mat         mat;
2969   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2970 
2971   PetscFunctionBegin;
2972   *newmat = NULL;
2973   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2974   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2975   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2976   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2977   a = (Mat_MPIAIJ *)mat->data;
2978 
2979   mat->factortype = matin->factortype;
2980   mat->assembled  = matin->assembled;
2981   mat->insertmode = NOT_SET_VALUES;
2982 
2983   a->size         = oldmat->size;
2984   a->rank         = oldmat->rank;
2985   a->donotstash   = oldmat->donotstash;
2986   a->roworiented  = oldmat->roworiented;
2987   a->rowindices   = NULL;
2988   a->rowvalues    = NULL;
2989   a->getrowactive = PETSC_FALSE;
2990 
2991   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2992   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2993   if (matin->hash_active) {
2994     PetscCall(MatSetUp(mat));
2995   } else {
2996     mat->preallocated = matin->preallocated;
2997     if (oldmat->colmap) {
2998 #if defined(PETSC_USE_CTABLE)
2999       PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3000 #else
3001       PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3002       PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3003 #endif
3004     } else a->colmap = NULL;
3005     if (oldmat->garray) {
3006       PetscInt len;
3007       len = oldmat->B->cmap->n;
3008       PetscCall(PetscMalloc1(len + 1, &a->garray));
3009       if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3010     } else a->garray = NULL;
3011 
3012     /* It may happen MatDuplicate is called with a non-assembled matrix
3013       In fact, MatDuplicate only requires the matrix to be preallocated
3014       This may happen inside a DMCreateMatrix_Shell */
3015     if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3016     if (oldmat->Mvctx) {
3017       a->Mvctx = oldmat->Mvctx;
3018       PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx));
3019     }
3020     PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3021     PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3022   }
3023   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3024   *newmat = mat;
3025   PetscFunctionReturn(PETSC_SUCCESS);
3026 }
3027 
3028 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3029 {
3030   PetscBool isbinary, ishdf5;
3031 
3032   PetscFunctionBegin;
3033   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3034   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3035   /* force binary viewer to load .info file if it has not yet done so */
3036   PetscCall(PetscViewerSetUp(viewer));
3037   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3038   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3039   if (isbinary) {
3040     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3041   } else if (ishdf5) {
3042 #if defined(PETSC_HAVE_HDF5)
3043     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3044 #else
3045     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3046 #endif
3047   } else {
3048     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3049   }
3050   PetscFunctionReturn(PETSC_SUCCESS);
3051 }
3052 
3053 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3054 {
3055   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3056   PetscInt    *rowidxs, *colidxs;
3057   PetscScalar *matvals;
3058 
3059   PetscFunctionBegin;
3060   PetscCall(PetscViewerSetUp(viewer));
3061 
3062   /* read in matrix header */
3063   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3064   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3065   M  = header[1];
3066   N  = header[2];
3067   nz = header[3];
3068   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3069   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3070   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3071 
3072   /* set block sizes from the viewer's .info file */
3073   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3074   /* set global sizes if not set already */
3075   if (mat->rmap->N < 0) mat->rmap->N = M;
3076   if (mat->cmap->N < 0) mat->cmap->N = N;
3077   PetscCall(PetscLayoutSetUp(mat->rmap));
3078   PetscCall(PetscLayoutSetUp(mat->cmap));
3079 
3080   /* check if the matrix sizes are correct */
3081   PetscCall(MatGetSize(mat, &rows, &cols));
3082   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3083 
3084   /* read in row lengths and build row indices */
3085   PetscCall(MatGetLocalSize(mat, &m, NULL));
3086   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3087   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3088   rowidxs[0] = 0;
3089   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3090   if (nz != PETSC_INT_MAX) {
3091     PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3092     PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3093   }
3094 
3095   /* read in column indices and matrix values */
3096   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3097   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3098   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3099   /* store matrix indices and values */
3100   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3101   PetscCall(PetscFree(rowidxs));
3102   PetscCall(PetscFree2(colidxs, matvals));
3103   PetscFunctionReturn(PETSC_SUCCESS);
3104 }
3105 
3106 /* Not scalable because of ISAllGather() unless getting all columns. */
3107 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3108 {
3109   IS          iscol_local;
3110   PetscBool   isstride;
3111   PetscMPIInt gisstride = 0;
3112 
3113   PetscFunctionBegin;
3114   /* check if we are grabbing all columns*/
3115   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3116 
3117   if (isstride) {
3118     PetscInt start, len, mstart, mlen;
3119     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3120     PetscCall(ISGetLocalSize(iscol, &len));
3121     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3122     if (mstart == start && mlen - mstart == len) gisstride = 1;
3123   }
3124 
3125   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3126   if (gisstride) {
3127     PetscInt N;
3128     PetscCall(MatGetSize(mat, NULL, &N));
3129     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3130     PetscCall(ISSetIdentity(iscol_local));
3131     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3132   } else {
3133     PetscInt cbs;
3134     PetscCall(ISGetBlockSize(iscol, &cbs));
3135     PetscCall(ISAllGather(iscol, &iscol_local));
3136     PetscCall(ISSetBlockSize(iscol_local, cbs));
3137   }
3138 
3139   *isseq = iscol_local;
3140   PetscFunctionReturn(PETSC_SUCCESS);
3141 }
3142 
3143 /*
3144  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3145  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3146 
3147  Input Parameters:
3148 +   mat - matrix
3149 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3150            i.e., mat->rstart <= isrow[i] < mat->rend
3151 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3152            i.e., mat->cstart <= iscol[i] < mat->cend
3153 
3154  Output Parameters:
3155 +   isrow_d - sequential row index set for retrieving mat->A
3156 .   iscol_d - sequential  column index set for retrieving mat->A
3157 .   iscol_o - sequential column index set for retrieving mat->B
3158 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3159  */
3160 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[])
3161 {
3162   Vec             x, cmap;
3163   const PetscInt *is_idx;
3164   PetscScalar    *xarray, *cmaparray;
3165   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3166   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3167   Mat             B    = a->B;
3168   Vec             lvec = a->lvec, lcmap;
3169   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3170   MPI_Comm        comm;
3171   VecScatter      Mvctx = a->Mvctx;
3172 
3173   PetscFunctionBegin;
3174   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3175   PetscCall(ISGetLocalSize(iscol, &ncols));
3176 
3177   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3178   PetscCall(MatCreateVecs(mat, &x, NULL));
3179   PetscCall(VecSet(x, -1.0));
3180   PetscCall(VecDuplicate(x, &cmap));
3181   PetscCall(VecSet(cmap, -1.0));
3182 
3183   /* Get start indices */
3184   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3185   isstart -= ncols;
3186   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3187 
3188   PetscCall(ISGetIndices(iscol, &is_idx));
3189   PetscCall(VecGetArray(x, &xarray));
3190   PetscCall(VecGetArray(cmap, &cmaparray));
3191   PetscCall(PetscMalloc1(ncols, &idx));
3192   for (i = 0; i < ncols; i++) {
3193     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3194     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3195     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3196   }
3197   PetscCall(VecRestoreArray(x, &xarray));
3198   PetscCall(VecRestoreArray(cmap, &cmaparray));
3199   PetscCall(ISRestoreIndices(iscol, &is_idx));
3200 
3201   /* Get iscol_d */
3202   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3203   PetscCall(ISGetBlockSize(iscol, &i));
3204   PetscCall(ISSetBlockSize(*iscol_d, i));
3205 
3206   /* Get isrow_d */
3207   PetscCall(ISGetLocalSize(isrow, &m));
3208   rstart = mat->rmap->rstart;
3209   PetscCall(PetscMalloc1(m, &idx));
3210   PetscCall(ISGetIndices(isrow, &is_idx));
3211   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3212   PetscCall(ISRestoreIndices(isrow, &is_idx));
3213 
3214   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3215   PetscCall(ISGetBlockSize(isrow, &i));
3216   PetscCall(ISSetBlockSize(*isrow_d, i));
3217 
3218   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3219   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3220   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3221 
3222   PetscCall(VecDuplicate(lvec, &lcmap));
3223 
3224   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3225   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3226 
3227   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3228   /* off-process column indices */
3229   count = 0;
3230   PetscCall(PetscMalloc1(Bn, &idx));
3231   PetscCall(PetscMalloc1(Bn, &cmap1));
3232 
3233   PetscCall(VecGetArray(lvec, &xarray));
3234   PetscCall(VecGetArray(lcmap, &cmaparray));
3235   for (i = 0; i < Bn; i++) {
3236     if (PetscRealPart(xarray[i]) > -1.0) {
3237       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3238       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3239       count++;
3240     }
3241   }
3242   PetscCall(VecRestoreArray(lvec, &xarray));
3243   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3244 
3245   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3246   /* cannot ensure iscol_o has same blocksize as iscol! */
3247 
3248   PetscCall(PetscFree(idx));
3249   *garray = cmap1;
3250 
3251   PetscCall(VecDestroy(&x));
3252   PetscCall(VecDestroy(&cmap));
3253   PetscCall(VecDestroy(&lcmap));
3254   PetscFunctionReturn(PETSC_SUCCESS);
3255 }
3256 
3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3259 {
3260   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3261   Mat         M = NULL;
3262   MPI_Comm    comm;
3263   IS          iscol_d, isrow_d, iscol_o;
3264   Mat         Asub = NULL, Bsub = NULL;
3265   PetscInt    n, count, M_size, N_size;
3266 
3267   PetscFunctionBegin;
3268   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3269 
3270   if (call == MAT_REUSE_MATRIX) {
3271     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3272     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3273     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3274 
3275     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3276     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3277 
3278     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3279     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3280 
3281     /* Update diagonal and off-diagonal portions of submat */
3282     asub = (Mat_MPIAIJ *)(*submat)->data;
3283     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3284     PetscCall(ISGetLocalSize(iscol_o, &n));
3285     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3286     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3287     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3288 
3289   } else { /* call == MAT_INITIAL_MATRIX) */
3290     PetscInt *garray, *garray_compact;
3291     PetscInt  BsubN;
3292 
3293     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3294     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3295 
3296     /* Create local submatrices Asub and Bsub */
3297     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3298     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3299 
3300     // Compact garray so its not of size Bn
3301     PetscCall(ISGetSize(iscol_o, &count));
3302     PetscCall(PetscMalloc1(count, &garray_compact));
3303     PetscCall(PetscArraycpy(garray_compact, garray, count));
3304 
3305     /* Create submatrix M */
3306     PetscCall(ISGetSize(isrow, &M_size));
3307     PetscCall(ISGetSize(iscol, &N_size));
3308     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M));
3309 
3310     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3311     asub = (Mat_MPIAIJ *)M->data;
3312 
3313     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3314     n = asub->B->cmap->N;
3315     if (BsubN > n) {
3316       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3317       const PetscInt *idx;
3318       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3319       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3320 
3321       PetscCall(PetscMalloc1(n, &idx_new));
3322       j = 0;
3323       PetscCall(ISGetIndices(iscol_o, &idx));
3324       for (i = 0; i < n; i++) {
3325         if (j >= BsubN) break;
3326         while (subgarray[i] > garray[j]) j++;
3327 
3328         PetscCheck(subgarray[i] == garray[j], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3329         idx_new[i] = idx[j++];
3330       }
3331       PetscCall(ISRestoreIndices(iscol_o, &idx));
3332 
3333       PetscCall(ISDestroy(&iscol_o));
3334       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3335 
3336     } else PetscCheck(BsubN >= n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3337 
3338     PetscCall(PetscFree(garray));
3339     *submat = M;
3340 
3341     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3342     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3343     PetscCall(ISDestroy(&isrow_d));
3344 
3345     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3346     PetscCall(ISDestroy(&iscol_d));
3347 
3348     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3349     PetscCall(ISDestroy(&iscol_o));
3350   }
3351   PetscFunctionReturn(PETSC_SUCCESS);
3352 }
3353 
3354 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3355 {
3356   IS        iscol_local = NULL, isrow_d;
3357   PetscInt  csize;
3358   PetscInt  n, i, j, start, end;
3359   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3360   MPI_Comm  comm;
3361 
3362   PetscFunctionBegin;
3363   /* If isrow has same processor distribution as mat,
3364      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3365   if (call == MAT_REUSE_MATRIX) {
3366     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3367     if (isrow_d) {
3368       sameRowDist  = PETSC_TRUE;
3369       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3370     } else {
3371       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3372       if (iscol_local) {
3373         sameRowDist  = PETSC_TRUE;
3374         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3375       }
3376     }
3377   } else {
3378     /* Check if isrow has same processor distribution as mat */
3379     sameDist[0] = PETSC_FALSE;
3380     PetscCall(ISGetLocalSize(isrow, &n));
3381     if (!n) {
3382       sameDist[0] = PETSC_TRUE;
3383     } else {
3384       PetscCall(ISGetMinMax(isrow, &i, &j));
3385       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3386       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3387     }
3388 
3389     /* Check if iscol has same processor distribution as mat */
3390     sameDist[1] = PETSC_FALSE;
3391     PetscCall(ISGetLocalSize(iscol, &n));
3392     if (!n) {
3393       sameDist[1] = PETSC_TRUE;
3394     } else {
3395       PetscCall(ISGetMinMax(iscol, &i, &j));
3396       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3397       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3398     }
3399 
3400     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3401     PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3402     sameRowDist = tsameDist[0];
3403   }
3404 
3405   if (sameRowDist) {
3406     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3407       /* isrow and iscol have same processor distribution as mat */
3408       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3409       PetscFunctionReturn(PETSC_SUCCESS);
3410     } else { /* sameRowDist */
3411       /* isrow has same processor distribution as mat */
3412       if (call == MAT_INITIAL_MATRIX) {
3413         PetscBool sorted;
3414         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3415         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3416         PetscCall(ISGetSize(iscol, &i));
3417         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3418 
3419         PetscCall(ISSorted(iscol_local, &sorted));
3420         if (sorted) {
3421           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3422           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3423           PetscFunctionReturn(PETSC_SUCCESS);
3424         }
3425       } else { /* call == MAT_REUSE_MATRIX */
3426         IS iscol_sub;
3427         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3428         if (iscol_sub) {
3429           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3430           PetscFunctionReturn(PETSC_SUCCESS);
3431         }
3432       }
3433     }
3434   }
3435 
3436   /* General case: iscol -> iscol_local which has global size of iscol */
3437   if (call == MAT_REUSE_MATRIX) {
3438     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3439     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3440   } else {
3441     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3442   }
3443 
3444   PetscCall(ISGetLocalSize(iscol, &csize));
3445   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3446 
3447   if (call == MAT_INITIAL_MATRIX) {
3448     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3449     PetscCall(ISDestroy(&iscol_local));
3450   }
3451   PetscFunctionReturn(PETSC_SUCCESS);
3452 }
3453 
3454 /*@C
3455   MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3456   and "off-diagonal" part of the matrix in CSR format.
3457 
3458   Collective
3459 
3460   Input Parameters:
3461 + comm   - MPI communicator
3462 . M      - the global row size
3463 . N      - the global column size
3464 . A      - "diagonal" portion of matrix
3465 . B      - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray
3466 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter.
3467 
3468   Output Parameter:
3469 . mat - the matrix, with input `A` as its local diagonal matrix
3470 
3471   Level: advanced
3472 
3473   Notes:
3474   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3475 
3476   `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore.
3477 
3478   If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update
3479   `B`'s copy on device.  We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or
3480   `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray`
3481   yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`.
3482 
3483   The `NULL`-ness of `garray` doesn't need to be collective, in other words, `garray` can be `NULL` on some processes while not on others.
3484 
3485 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3486 @*/
3487 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat)
3488 {
3489   PetscInt    m, n;
3490   MatType     mpi_mat_type;
3491   Mat_MPIAIJ *mpiaij;
3492   Mat         C;
3493 
3494   PetscFunctionBegin;
3495   PetscCall(MatCreate(comm, &C));
3496   PetscCall(MatGetSize(A, &m, &n));
3497   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3498   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3499 
3500   PetscCall(MatSetSizes(C, m, n, M, N));
3501   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3502   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3503   PetscCall(MatSetType(C, mpi_mat_type));
3504   if (!garray) {
3505     const PetscScalar *ba;
3506 
3507     B->nonzerostate++;
3508     PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */
3509     PetscCall(MatSeqAIJRestoreArrayRead(B, &ba));
3510   }
3511 
3512   PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs));
3513   PetscCall(PetscLayoutSetUp(C->rmap));
3514   PetscCall(PetscLayoutSetUp(C->cmap));
3515 
3516   mpiaij              = (Mat_MPIAIJ *)C->data;
3517   mpiaij->A           = A;
3518   mpiaij->B           = B;
3519   mpiaij->garray      = garray;
3520   C->preallocated     = PETSC_TRUE;
3521   C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */
3522 
3523   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3524   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
3525   /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and
3526    also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced
3527    */
3528   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
3529   PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3530   PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3531   *mat = C;
3532   PetscFunctionReturn(PETSC_SUCCESS);
3533 }
3534 
3535 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3536 
3537 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3538 {
3539   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3540   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3541   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3542   Mat             M, Msub, B = a->B;
3543   MatScalar      *aa;
3544   Mat_SeqAIJ     *aij;
3545   PetscInt       *garray = a->garray, *colsub, Ncols;
3546   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3547   IS              iscol_sub, iscmap;
3548   const PetscInt *is_idx, *cmap;
3549   PetscBool       allcolumns = PETSC_FALSE;
3550   MPI_Comm        comm;
3551 
3552   PetscFunctionBegin;
3553   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3554   if (call == MAT_REUSE_MATRIX) {
3555     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3556     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3557     PetscCall(ISGetLocalSize(iscol_sub, &count));
3558 
3559     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3560     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3561 
3562     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3563     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3564 
3565     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3566 
3567   } else { /* call == MAT_INITIAL_MATRIX) */
3568     PetscBool flg;
3569 
3570     PetscCall(ISGetLocalSize(iscol, &n));
3571     PetscCall(ISGetSize(iscol, &Ncols));
3572 
3573     /* (1) iscol -> nonscalable iscol_local */
3574     /* Check for special case: each processor gets entire matrix columns */
3575     PetscCall(ISIdentity(iscol_local, &flg));
3576     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3577     PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3578     if (allcolumns) {
3579       iscol_sub = iscol_local;
3580       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3581       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3582 
3583     } else {
3584       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3585       PetscInt *idx, *cmap1, k;
3586       PetscCall(PetscMalloc1(Ncols, &idx));
3587       PetscCall(PetscMalloc1(Ncols, &cmap1));
3588       PetscCall(ISGetIndices(iscol_local, &is_idx));
3589       count = 0;
3590       k     = 0;
3591       for (i = 0; i < Ncols; i++) {
3592         j = is_idx[i];
3593         if (j >= cstart && j < cend) {
3594           /* diagonal part of mat */
3595           idx[count]     = j;
3596           cmap1[count++] = i; /* column index in submat */
3597         } else if (Bn) {
3598           /* off-diagonal part of mat */
3599           if (j == garray[k]) {
3600             idx[count]     = j;
3601             cmap1[count++] = i; /* column index in submat */
3602           } else if (j > garray[k]) {
3603             while (j > garray[k] && k < Bn - 1) k++;
3604             if (j == garray[k]) {
3605               idx[count]     = j;
3606               cmap1[count++] = i; /* column index in submat */
3607             }
3608           }
3609         }
3610       }
3611       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3612 
3613       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3614       PetscCall(ISGetBlockSize(iscol, &cbs));
3615       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3616 
3617       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3618     }
3619 
3620     /* (3) Create sequential Msub */
3621     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3622   }
3623 
3624   PetscCall(ISGetLocalSize(iscol_sub, &count));
3625   aij = (Mat_SeqAIJ *)Msub->data;
3626   ii  = aij->i;
3627   PetscCall(ISGetIndices(iscmap, &cmap));
3628 
3629   /*
3630       m - number of local rows
3631       Ncols - number of columns (same on all processors)
3632       rstart - first row in new global matrix generated
3633   */
3634   PetscCall(MatGetSize(Msub, &m, NULL));
3635 
3636   if (call == MAT_INITIAL_MATRIX) {
3637     /* (4) Create parallel newmat */
3638     PetscMPIInt rank, size;
3639     PetscInt    csize;
3640 
3641     PetscCallMPI(MPI_Comm_size(comm, &size));
3642     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3643 
3644     /*
3645         Determine the number of non-zeros in the diagonal and off-diagonal
3646         portions of the matrix in order to do correct preallocation
3647     */
3648 
3649     /* first get start and end of "diagonal" columns */
3650     PetscCall(ISGetLocalSize(iscol, &csize));
3651     if (csize == PETSC_DECIDE) {
3652       PetscCall(ISGetSize(isrow, &mglobal));
3653       if (mglobal == Ncols) { /* square matrix */
3654         nlocal = m;
3655       } else {
3656         nlocal = Ncols / size + ((Ncols % size) > rank);
3657       }
3658     } else {
3659       nlocal = csize;
3660     }
3661     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3662     rstart = rend - nlocal;
3663     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3664 
3665     /* next, compute all the lengths */
3666     jj = aij->j;
3667     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3668     olens = dlens + m;
3669     for (i = 0; i < m; i++) {
3670       jend = ii[i + 1] - ii[i];
3671       olen = 0;
3672       dlen = 0;
3673       for (j = 0; j < jend; j++) {
3674         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3675         else dlen++;
3676         jj++;
3677       }
3678       olens[i] = olen;
3679       dlens[i] = dlen;
3680     }
3681 
3682     PetscCall(ISGetBlockSize(isrow, &bs));
3683     PetscCall(ISGetBlockSize(iscol, &cbs));
3684 
3685     PetscCall(MatCreate(comm, &M));
3686     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3687     PetscCall(MatSetBlockSizes(M, bs, cbs));
3688     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3689     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3690     PetscCall(PetscFree(dlens));
3691 
3692   } else { /* call == MAT_REUSE_MATRIX */
3693     M = *newmat;
3694     PetscCall(MatGetLocalSize(M, &i, NULL));
3695     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3696     PetscCall(MatZeroEntries(M));
3697     /*
3698          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3699        rather than the slower MatSetValues().
3700     */
3701     M->was_assembled = PETSC_TRUE;
3702     M->assembled     = PETSC_FALSE;
3703   }
3704 
3705   /* (5) Set values of Msub to *newmat */
3706   PetscCall(PetscMalloc1(count, &colsub));
3707   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3708 
3709   jj = aij->j;
3710   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3711   for (i = 0; i < m; i++) {
3712     row = rstart + i;
3713     nz  = ii[i + 1] - ii[i];
3714     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3715     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3716     jj += nz;
3717     aa += nz;
3718   }
3719   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3720   PetscCall(ISRestoreIndices(iscmap, &cmap));
3721 
3722   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3723   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3724 
3725   PetscCall(PetscFree(colsub));
3726 
3727   /* save Msub, iscol_sub and iscmap used in processor for next request */
3728   if (call == MAT_INITIAL_MATRIX) {
3729     *newmat = M;
3730     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub));
3731     PetscCall(MatDestroy(&Msub));
3732 
3733     PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub));
3734     PetscCall(ISDestroy(&iscol_sub));
3735 
3736     PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap));
3737     PetscCall(ISDestroy(&iscmap));
3738 
3739     if (iscol_local) {
3740       PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3741       PetscCall(ISDestroy(&iscol_local));
3742     }
3743   }
3744   PetscFunctionReturn(PETSC_SUCCESS);
3745 }
3746 
3747 /*
3748     Not great since it makes two copies of the submatrix, first an SeqAIJ
3749   in local and then by concatenating the local matrices the end result.
3750   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3751 
3752   This requires a sequential iscol with all indices.
3753 */
3754 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3755 {
3756   PetscMPIInt rank, size;
3757   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3758   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3759   Mat         M, Mreuse;
3760   MatScalar  *aa, *vwork;
3761   MPI_Comm    comm;
3762   Mat_SeqAIJ *aij;
3763   PetscBool   colflag, allcolumns = PETSC_FALSE;
3764 
3765   PetscFunctionBegin;
3766   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3767   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3768   PetscCallMPI(MPI_Comm_size(comm, &size));
3769 
3770   /* Check for special case: each processor gets entire matrix columns */
3771   PetscCall(ISIdentity(iscol, &colflag));
3772   PetscCall(ISGetLocalSize(iscol, &n));
3773   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3774   PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3775 
3776   if (call == MAT_REUSE_MATRIX) {
3777     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3778     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3779     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3780   } else {
3781     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3782   }
3783 
3784   /*
3785       m - number of local rows
3786       n - number of columns (same on all processors)
3787       rstart - first row in new global matrix generated
3788   */
3789   PetscCall(MatGetSize(Mreuse, &m, &n));
3790   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3791   if (call == MAT_INITIAL_MATRIX) {
3792     aij = (Mat_SeqAIJ *)Mreuse->data;
3793     ii  = aij->i;
3794     jj  = aij->j;
3795 
3796     /*
3797         Determine the number of non-zeros in the diagonal and off-diagonal
3798         portions of the matrix in order to do correct preallocation
3799     */
3800 
3801     /* first get start and end of "diagonal" columns */
3802     if (csize == PETSC_DECIDE) {
3803       PetscCall(ISGetSize(isrow, &mglobal));
3804       if (mglobal == n) { /* square matrix */
3805         nlocal = m;
3806       } else {
3807         nlocal = n / size + ((n % size) > rank);
3808       }
3809     } else {
3810       nlocal = csize;
3811     }
3812     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3813     rstart = rend - nlocal;
3814     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3815 
3816     /* next, compute all the lengths */
3817     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3818     olens = dlens + m;
3819     for (i = 0; i < m; i++) {
3820       jend = ii[i + 1] - ii[i];
3821       olen = 0;
3822       dlen = 0;
3823       for (j = 0; j < jend; j++) {
3824         if (*jj < rstart || *jj >= rend) olen++;
3825         else dlen++;
3826         jj++;
3827       }
3828       olens[i] = olen;
3829       dlens[i] = dlen;
3830     }
3831     PetscCall(MatCreate(comm, &M));
3832     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3833     PetscCall(MatSetBlockSizes(M, bs, cbs));
3834     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3835     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3836     PetscCall(PetscFree(dlens));
3837   } else {
3838     PetscInt ml, nl;
3839 
3840     M = *newmat;
3841     PetscCall(MatGetLocalSize(M, &ml, &nl));
3842     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3843     PetscCall(MatZeroEntries(M));
3844     /*
3845          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3846        rather than the slower MatSetValues().
3847     */
3848     M->was_assembled = PETSC_TRUE;
3849     M->assembled     = PETSC_FALSE;
3850   }
3851   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3852   aij = (Mat_SeqAIJ *)Mreuse->data;
3853   ii  = aij->i;
3854   jj  = aij->j;
3855 
3856   /* trigger copy to CPU if needed */
3857   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3858   for (i = 0; i < m; i++) {
3859     row   = rstart + i;
3860     nz    = ii[i + 1] - ii[i];
3861     cwork = jj;
3862     jj    = PetscSafePointerPlusOffset(jj, nz);
3863     vwork = aa;
3864     aa    = PetscSafePointerPlusOffset(aa, nz);
3865     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3866   }
3867   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3868 
3869   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3870   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3871   *newmat = M;
3872 
3873   /* save submatrix used in processor for next request */
3874   if (call == MAT_INITIAL_MATRIX) {
3875     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3876     PetscCall(MatDestroy(&Mreuse));
3877   }
3878   PetscFunctionReturn(PETSC_SUCCESS);
3879 }
3880 
3881 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3882 {
3883   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3884   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart;
3885   const PetscInt *JJ;
3886   PetscBool       nooffprocentries;
3887   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3888 
3889   PetscFunctionBegin;
3890   PetscCall(PetscLayoutSetUp(B->rmap));
3891   PetscCall(PetscLayoutSetUp(B->cmap));
3892   m       = B->rmap->n;
3893   cstart  = B->cmap->rstart;
3894   cend    = B->cmap->rend;
3895   rstart  = B->rmap->rstart;
3896   irstart = Ii[0];
3897 
3898   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3899 
3900   if (PetscDefined(USE_DEBUG)) {
3901     for (i = 0; i < m; i++) {
3902       nnz = Ii[i + 1] - Ii[i];
3903       JJ  = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3904       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3905       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3906       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3907     }
3908   }
3909 
3910   for (i = 0; i < m; i++) {
3911     nnz     = Ii[i + 1] - Ii[i];
3912     JJ      = PetscSafePointerPlusOffset(J, Ii[i] - irstart);
3913     nnz_max = PetscMax(nnz_max, nnz);
3914     d       = 0;
3915     for (j = 0; j < nnz; j++) {
3916       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3917     }
3918     d_nnz[i] = d;
3919     o_nnz[i] = nnz - d;
3920   }
3921   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3922   PetscCall(PetscFree2(d_nnz, o_nnz));
3923 
3924   for (i = 0; i < m; i++) {
3925     ii = i + rstart;
3926     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES));
3927   }
3928   nooffprocentries    = B->nooffprocentries;
3929   B->nooffprocentries = PETSC_TRUE;
3930   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3931   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3932   B->nooffprocentries = nooffprocentries;
3933 
3934   /* count number of entries below block diagonal */
3935   PetscCall(PetscFree(Aij->ld));
3936   PetscCall(PetscCalloc1(m, &ld));
3937   Aij->ld = ld;
3938   for (i = 0; i < m; i++) {
3939     nnz = Ii[i + 1] - Ii[i];
3940     j   = 0;
3941     while (j < nnz && J[j] < cstart) j++;
3942     ld[i] = j;
3943     if (J) J += nnz;
3944   }
3945 
3946   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3947   PetscFunctionReturn(PETSC_SUCCESS);
3948 }
3949 
3950 /*@
3951   MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3952   (the default parallel PETSc format).
3953 
3954   Collective
3955 
3956   Input Parameters:
3957 + B - the matrix
3958 . i - the indices into `j` for the start of each local row (indices start with zero)
3959 . j - the column indices for each local row (indices start with zero)
3960 - v - optional values in the matrix
3961 
3962   Level: developer
3963 
3964   Notes:
3965   The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3966   thus you CANNOT change the matrix entries by changing the values of `v` after you have
3967   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3968 
3969   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3970 
3971   A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`.
3972 
3973   You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted.
3974 
3975   If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use
3976   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
3977 
3978   The format which is used for the sparse matrix input, is equivalent to a
3979   row-major ordering.. i.e for the following matrix, the input data expected is
3980   as shown
3981 .vb
3982         1 0 0
3983         2 0 3     P0
3984        -------
3985         4 5 6     P1
3986 
3987      Process0 [P0] rows_owned=[0,1]
3988         i =  {0,1,3}  [size = nrow+1  = 2+1]
3989         j =  {0,0,2}  [size = 3]
3990         v =  {1,2,3}  [size = 3]
3991 
3992      Process1 [P1] rows_owned=[2]
3993         i =  {0,3}    [size = nrow+1  = 1+1]
3994         j =  {0,1,2}  [size = 3]
3995         v =  {4,5,6}  [size = 3]
3996 .ve
3997 
3998 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`,
3999           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4000 @*/
4001 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4002 {
4003   PetscFunctionBegin;
4004   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4005   PetscFunctionReturn(PETSC_SUCCESS);
4006 }
4007 
4008 /*@
4009   MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4010   (the default parallel PETSc format).  For good matrix assembly performance
4011   the user should preallocate the matrix storage by setting the parameters
4012   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4013 
4014   Collective
4015 
4016   Input Parameters:
4017 + B     - the matrix
4018 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4019            (same value is used for all local rows)
4020 . d_nnz - array containing the number of nonzeros in the various rows of the
4021            DIAGONAL portion of the local submatrix (possibly different for each row)
4022            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4023            The size of this array is equal to the number of local rows, i.e 'm'.
4024            For matrices that will be factored, you must leave room for (and set)
4025            the diagonal entry even if it is zero.
4026 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4027            submatrix (same value is used for all local rows).
4028 - o_nnz - array containing the number of nonzeros in the various rows of the
4029            OFF-DIAGONAL portion of the local submatrix (possibly different for
4030            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4031            structure. The size of this array is equal to the number
4032            of local rows, i.e 'm'.
4033 
4034   Example Usage:
4035   Consider the following 8x8 matrix with 34 non-zero values, that is
4036   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4037   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4038   as follows
4039 
4040 .vb
4041             1  2  0  |  0  3  0  |  0  4
4042     Proc0   0  5  6  |  7  0  0  |  8  0
4043             9  0 10  | 11  0  0  | 12  0
4044     -------------------------------------
4045            13  0 14  | 15 16 17  |  0  0
4046     Proc1   0 18  0  | 19 20 21  |  0  0
4047             0  0  0  | 22 23  0  | 24  0
4048     -------------------------------------
4049     Proc2  25 26 27  |  0  0 28  | 29  0
4050            30  0  0  | 31 32 33  |  0 34
4051 .ve
4052 
4053   This can be represented as a collection of submatrices as
4054 .vb
4055       A B C
4056       D E F
4057       G H I
4058 .ve
4059 
4060   Where the submatrices A,B,C are owned by proc0, D,E,F are
4061   owned by proc1, G,H,I are owned by proc2.
4062 
4063   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4064   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4065   The 'M','N' parameters are 8,8, and have the same values on all procs.
4066 
4067   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4068   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4069   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4070   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4071   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4072   matrix, and [DF] as another `MATSEQAIJ` matrix.
4073 
4074   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4075   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4076   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4077   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4078   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4079   In this case, the values of `d_nz`, `o_nz` are
4080 .vb
4081      proc0  dnz = 2, o_nz = 2
4082      proc1  dnz = 3, o_nz = 2
4083      proc2  dnz = 1, o_nz = 4
4084 .ve
4085   We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4086   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4087   for proc3. i.e we are using 12+15+10=37 storage locations to store
4088   34 values.
4089 
4090   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4091   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4092   In the above case the values for `d_nnz`, `o_nnz` are
4093 .vb
4094      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4095      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4096      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4097 .ve
4098   Here the space allocated is sum of all the above values i.e 34, and
4099   hence pre-allocation is perfect.
4100 
4101   Level: intermediate
4102 
4103   Notes:
4104   If the *_nnz parameter is given then the *_nz parameter is ignored
4105 
4106   The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4107   storage.  The stored row and column indices begin with zero.
4108   See [Sparse Matrices](sec_matsparse) for details.
4109 
4110   The parallel matrix is partitioned such that the first m0 rows belong to
4111   process 0, the next m1 rows belong to process 1, the next m2 rows belong
4112   to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4113 
4114   The DIAGONAL portion of the local submatrix of a processor can be defined
4115   as the submatrix which is obtained by extraction the part corresponding to
4116   the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4117   first row that belongs to the processor, r2 is the last row belonging to
4118   the this processor, and c1-c2 is range of indices of the local part of a
4119   vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4120   common case of a square matrix, the row and column ranges are the same and
4121   the DIAGONAL part is also square. The remaining portion of the local
4122   submatrix (mxN) constitute the OFF-DIAGONAL portion.
4123 
4124   If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4125 
4126   You can call `MatGetInfo()` to get information on how effective the preallocation was;
4127   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4128   You can also run with the option `-info` and look for messages with the string
4129   malloc in them to see if additional memory allocation was needed.
4130 
4131 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4132           `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4133 @*/
4134 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4135 {
4136   PetscFunctionBegin;
4137   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4138   PetscValidType(B, 1);
4139   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4140   PetscFunctionReturn(PETSC_SUCCESS);
4141 }
4142 
4143 /*@
4144   MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4145   CSR format for the local rows.
4146 
4147   Collective
4148 
4149   Input Parameters:
4150 + comm - MPI communicator
4151 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
4152 . n    - This value should be the same as the local size used in creating the
4153          x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have
4154          calculated if `N` is given) For square matrices n is almost always `m`.
4155 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
4156 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
4157 . i    - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4158 . j    - global column indices
4159 - a    - optional matrix values
4160 
4161   Output Parameter:
4162 . mat - the matrix
4163 
4164   Level: intermediate
4165 
4166   Notes:
4167   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4168   thus you CANNOT change the matrix entries by changing the values of `a[]` after you have
4169   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4170 
4171   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4172 
4173   Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()`
4174 
4175   If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use
4176   `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted.
4177 
4178   The format which is used for the sparse matrix input, is equivalent to a
4179   row-major ordering, i.e., for the following matrix, the input data expected is
4180   as shown
4181 .vb
4182         1 0 0
4183         2 0 3     P0
4184        -------
4185         4 5 6     P1
4186 
4187      Process0 [P0] rows_owned=[0,1]
4188         i =  {0,1,3}  [size = nrow+1  = 2+1]
4189         j =  {0,0,2}  [size = 3]
4190         v =  {1,2,3}  [size = 3]
4191 
4192      Process1 [P1] rows_owned=[2]
4193         i =  {0,3}    [size = nrow+1  = 1+1]
4194         j =  {0,1,2}  [size = 3]
4195         v =  {4,5,6}  [size = 3]
4196 .ve
4197 
4198 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4199           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4200 @*/
4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4202 {
4203   PetscFunctionBegin;
4204   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4205   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4206   PetscCall(MatCreate(comm, mat));
4207   PetscCall(MatSetSizes(*mat, m, n, M, N));
4208   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4209   PetscCall(MatSetType(*mat, MATMPIAIJ));
4210   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4211   PetscFunctionReturn(PETSC_SUCCESS);
4212 }
4213 
4214 /*@
4215   MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4216   CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4217   from `MatCreateMPIAIJWithArrays()`
4218 
4219   Deprecated: Use `MatUpdateMPIAIJWithArray()`
4220 
4221   Collective
4222 
4223   Input Parameters:
4224 + mat - the matrix
4225 . m   - number of local rows (Cannot be `PETSC_DECIDE`)
4226 . n   - This value should be the same as the local size used in creating the
4227        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4228        calculated if N is given) For square matrices n is almost always m.
4229 . M   - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4230 . N   - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4231 . Ii  - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4232 . J   - column indices
4233 - v   - matrix values
4234 
4235   Level: deprecated
4236 
4237 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4238           `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4239 @*/
4240 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4241 {
4242   PetscInt        nnz, i;
4243   PetscBool       nooffprocentries;
4244   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4245   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4246   PetscScalar    *ad, *ao;
4247   PetscInt        ldi, Iii, md;
4248   const PetscInt *Adi = Ad->i;
4249   PetscInt       *ld  = Aij->ld;
4250 
4251   PetscFunctionBegin;
4252   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4253   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4254   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4255   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4256 
4257   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4258   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4259 
4260   for (i = 0; i < m; i++) {
4261     if (PetscDefined(USE_DEBUG)) {
4262       for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) {
4263         PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i);
4264         PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i);
4265       }
4266     }
4267     nnz = Ii[i + 1] - Ii[i];
4268     Iii = Ii[i];
4269     ldi = ld[i];
4270     md  = Adi[i + 1] - Adi[i];
4271     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4272     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4273     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4274     ad += md;
4275     ao += nnz - md;
4276   }
4277   nooffprocentries      = mat->nooffprocentries;
4278   mat->nooffprocentries = PETSC_TRUE;
4279   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4280   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4281   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4282   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4283   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4284   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4285   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4286   mat->nooffprocentries = nooffprocentries;
4287   PetscFunctionReturn(PETSC_SUCCESS);
4288 }
4289 
4290 /*@
4291   MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4292 
4293   Collective
4294 
4295   Input Parameters:
4296 + mat - the matrix
4297 - v   - matrix values, stored by row
4298 
4299   Level: intermediate
4300 
4301   Notes:
4302   The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4303 
4304   The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly
4305 
4306 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4307           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()`
4308 @*/
4309 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4310 {
4311   PetscInt        nnz, i, m;
4312   PetscBool       nooffprocentries;
4313   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4314   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4315   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4316   PetscScalar    *ad, *ao;
4317   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4318   PetscInt        ldi, Iii, md;
4319   PetscInt       *ld = Aij->ld;
4320 
4321   PetscFunctionBegin;
4322   m = mat->rmap->n;
4323 
4324   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4325   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4326   Iii = 0;
4327   for (i = 0; i < m; i++) {
4328     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4329     ldi = ld[i];
4330     md  = Adi[i + 1] - Adi[i];
4331     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4332     ad += md;
4333     if (ao) {
4334       PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4335       PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4336       ao += nnz - md;
4337     }
4338     Iii += nnz;
4339   }
4340   nooffprocentries      = mat->nooffprocentries;
4341   mat->nooffprocentries = PETSC_TRUE;
4342   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4343   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4344   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4345   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4346   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4347   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4348   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4349   mat->nooffprocentries = nooffprocentries;
4350   PetscFunctionReturn(PETSC_SUCCESS);
4351 }
4352 
4353 /*@
4354   MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4355   (the default parallel PETSc format).  For good matrix assembly performance
4356   the user should preallocate the matrix storage by setting the parameters
4357   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4358 
4359   Collective
4360 
4361   Input Parameters:
4362 + comm  - MPI communicator
4363 . m     - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4364           This value should be the same as the local size used in creating the
4365           y vector for the matrix-vector product y = Ax.
4366 . n     - This value should be the same as the local size used in creating the
4367           x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4368           calculated if N is given) For square matrices n is almost always m.
4369 . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4370 . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4371 . d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4372           (same value is used for all local rows)
4373 . d_nnz - array containing the number of nonzeros in the various rows of the
4374           DIAGONAL portion of the local submatrix (possibly different for each row)
4375           or `NULL`, if `d_nz` is used to specify the nonzero structure.
4376           The size of this array is equal to the number of local rows, i.e 'm'.
4377 . o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4378           submatrix (same value is used for all local rows).
4379 - o_nnz - array containing the number of nonzeros in the various rows of the
4380           OFF-DIAGONAL portion of the local submatrix (possibly different for
4381           each row) or `NULL`, if `o_nz` is used to specify the nonzero
4382           structure. The size of this array is equal to the number
4383           of local rows, i.e 'm'.
4384 
4385   Output Parameter:
4386 . A - the matrix
4387 
4388   Options Database Keys:
4389 + -mat_no_inode                     - Do not use inodes
4390 . -mat_inode_limit <limit>          - Sets inode limit (max limit=5)
4391 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4392                                       See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter`
4393                                       to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4394 
4395   Level: intermediate
4396 
4397   Notes:
4398   It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4399   MatXXXXSetPreallocation() paradigm instead of this routine directly.
4400   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4401 
4402   If the *_nnz parameter is given then the *_nz parameter is ignored
4403 
4404   The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4405   processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4406   storage requirements for this matrix.
4407 
4408   If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4409   processor than it must be used on all processors that share the object for
4410   that argument.
4411 
4412   If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by
4413   `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`.
4414 
4415   The user MUST specify either the local or global matrix dimensions
4416   (possibly both).
4417 
4418   The parallel matrix is partitioned across processors such that the
4419   first `m0` rows belong to process 0, the next `m1` rows belong to
4420   process 1, the next `m2` rows belong to process 2, etc., where
4421   `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores
4422   values corresponding to [m x N] submatrix.
4423 
4424   The columns are logically partitioned with the n0 columns belonging
4425   to 0th partition, the next n1 columns belonging to the next
4426   partition etc.. where n0,n1,n2... are the input parameter 'n'.
4427 
4428   The DIAGONAL portion of the local submatrix on any given processor
4429   is the submatrix corresponding to the rows and columns m,n
4430   corresponding to the given processor. i.e diagonal matrix on
4431   process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4432   etc. The remaining portion of the local submatrix [m x (N-n)]
4433   constitute the OFF-DIAGONAL portion. The example below better
4434   illustrates this concept. The two matrices, the DIAGONAL portion and
4435   the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices.
4436 
4437   For a square global matrix we define each processor's diagonal portion
4438   to be its local rows and the corresponding columns (a square submatrix);
4439   each processor's off-diagonal portion encompasses the remainder of the
4440   local matrix (a rectangular submatrix).
4441 
4442   If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4443 
4444   When calling this routine with a single process communicator, a matrix of
4445   type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4446   type of communicator, use the construction mechanism
4447 .vb
4448   MatCreate(..., &A);
4449   MatSetType(A, MATMPIAIJ);
4450   MatSetSizes(A, m, n, M, N);
4451   MatMPIAIJSetPreallocation(A, ...);
4452 .ve
4453 
4454   By default, this format uses inodes (identical nodes) when possible.
4455   We search for consecutive rows with the same nonzero structure, thereby
4456   reusing matrix information to achieve increased efficiency.
4457 
4458   Example Usage:
4459   Consider the following 8x8 matrix with 34 non-zero values, that is
4460   assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4461   proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4462   as follows
4463 
4464 .vb
4465             1  2  0  |  0  3  0  |  0  4
4466     Proc0   0  5  6  |  7  0  0  |  8  0
4467             9  0 10  | 11  0  0  | 12  0
4468     -------------------------------------
4469            13  0 14  | 15 16 17  |  0  0
4470     Proc1   0 18  0  | 19 20 21  |  0  0
4471             0  0  0  | 22 23  0  | 24  0
4472     -------------------------------------
4473     Proc2  25 26 27  |  0  0 28  | 29  0
4474            30  0  0  | 31 32 33  |  0 34
4475 .ve
4476 
4477   This can be represented as a collection of submatrices as
4478 
4479 .vb
4480       A B C
4481       D E F
4482       G H I
4483 .ve
4484 
4485   Where the submatrices A,B,C are owned by proc0, D,E,F are
4486   owned by proc1, G,H,I are owned by proc2.
4487 
4488   The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4489   The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4490   The 'M','N' parameters are 8,8, and have the same values on all procs.
4491 
4492   The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4493   submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4494   corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4495   Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4496   part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4497   matrix, and [DF] as another SeqAIJ matrix.
4498 
4499   When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4500   allocated for every row of the local DIAGONAL submatrix, and `o_nz`
4501   storage locations are allocated for every row of the OFF-DIAGONAL submatrix.
4502   One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over
4503   the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4504   In this case, the values of `d_nz`,`o_nz` are
4505 .vb
4506      proc0  dnz = 2, o_nz = 2
4507      proc1  dnz = 3, o_nz = 2
4508      proc2  dnz = 1, o_nz = 4
4509 .ve
4510   We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4511   translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4512   for proc3. i.e we are using 12+15+10=37 storage locations to store
4513   34 values.
4514 
4515   When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4516   for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4517   In the above case the values for d_nnz,o_nnz are
4518 .vb
4519      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4520      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4521      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4522 .ve
4523   Here the space allocated is sum of all the above values i.e 34, and
4524   hence pre-allocation is perfect.
4525 
4526 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4527           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`,
4528           `MatGetOwnershipRangesColumn()`, `PetscLayout`
4529 @*/
4530 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4531 {
4532   PetscMPIInt size;
4533 
4534   PetscFunctionBegin;
4535   PetscCall(MatCreate(comm, A));
4536   PetscCall(MatSetSizes(*A, m, n, M, N));
4537   PetscCallMPI(MPI_Comm_size(comm, &size));
4538   if (size > 1) {
4539     PetscCall(MatSetType(*A, MATMPIAIJ));
4540     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4541   } else {
4542     PetscCall(MatSetType(*A, MATSEQAIJ));
4543     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4544   }
4545   PetscFunctionReturn(PETSC_SUCCESS);
4546 }
4547 
4548 /*@C
4549   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4550 
4551   Not Collective
4552 
4553   Input Parameter:
4554 . A - The `MATMPIAIJ` matrix
4555 
4556   Output Parameters:
4557 + Ad     - The local diagonal block as a `MATSEQAIJ` matrix
4558 . Ao     - The local off-diagonal block as a `MATSEQAIJ` matrix
4559 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4560 
4561   Level: intermediate
4562 
4563   Note:
4564   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4565   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4566   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4567   local column numbers to global column numbers in the original matrix.
4568 
4569 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ`
4570 @*/
4571 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4572 {
4573   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4574   PetscBool   flg;
4575 
4576   PetscFunctionBegin;
4577   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4578   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4579   if (Ad) *Ad = a->A;
4580   if (Ao) *Ao = a->B;
4581   if (colmap) *colmap = a->garray;
4582   PetscFunctionReturn(PETSC_SUCCESS);
4583 }
4584 
4585 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4586 {
4587   PetscInt     m, N, i, rstart, nnz, Ii;
4588   PetscInt    *indx;
4589   PetscScalar *values;
4590   MatType      rootType;
4591 
4592   PetscFunctionBegin;
4593   PetscCall(MatGetSize(inmat, &m, &N));
4594   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4595     PetscInt *dnz, *onz, sum, bs, cbs;
4596 
4597     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4598     /* Check sum(n) = N */
4599     PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4600     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4601 
4602     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4603     rstart -= m;
4604 
4605     MatPreallocateBegin(comm, m, n, dnz, onz);
4606     for (i = 0; i < m; i++) {
4607       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4608       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4609       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4610     }
4611 
4612     PetscCall(MatCreate(comm, outmat));
4613     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4614     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4615     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4616     PetscCall(MatGetRootType_Private(inmat, &rootType));
4617     PetscCall(MatSetType(*outmat, rootType));
4618     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4619     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4620     MatPreallocateEnd(dnz, onz);
4621     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4622   }
4623 
4624   /* numeric phase */
4625   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4626   for (i = 0; i < m; i++) {
4627     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4628     Ii = i + rstart;
4629     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4630     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4631   }
4632   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4633   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4634   PetscFunctionReturn(PETSC_SUCCESS);
4635 }
4636 
4637 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data)
4638 {
4639   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data;
4640 
4641   PetscFunctionBegin;
4642   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4643   PetscCall(PetscFree(merge->id_r));
4644   PetscCall(PetscFree(merge->len_s));
4645   PetscCall(PetscFree(merge->len_r));
4646   PetscCall(PetscFree(merge->bi));
4647   PetscCall(PetscFree(merge->bj));
4648   PetscCall(PetscFree(merge->buf_ri[0]));
4649   PetscCall(PetscFree(merge->buf_ri));
4650   PetscCall(PetscFree(merge->buf_rj[0]));
4651   PetscCall(PetscFree(merge->buf_rj));
4652   PetscCall(PetscFree(merge->coi));
4653   PetscCall(PetscFree(merge->coj));
4654   PetscCall(PetscFree(merge->owners_co));
4655   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4656   PetscCall(PetscFree(merge));
4657   PetscFunctionReturn(PETSC_SUCCESS);
4658 }
4659 
4660 #include <../src/mat/utils/freespace.h>
4661 #include <petscbt.h>
4662 
4663 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4664 {
4665   MPI_Comm             comm;
4666   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4667   PetscMPIInt          size, rank, taga, *len_s;
4668   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m;
4669   PetscMPIInt          proc, k;
4670   PetscInt           **buf_ri, **buf_rj;
4671   PetscInt             anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4672   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4673   MPI_Request         *s_waits, *r_waits;
4674   MPI_Status          *status;
4675   const MatScalar     *aa, *a_a;
4676   MatScalar          **abuf_r, *ba_i;
4677   Mat_Merge_SeqsToMPI *merge;
4678   PetscContainer       container;
4679 
4680   PetscFunctionBegin;
4681   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4682   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4683 
4684   PetscCallMPI(MPI_Comm_size(comm, &size));
4685   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4686 
4687   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4688   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4689   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4690   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4691   aa = a_a;
4692 
4693   bi     = merge->bi;
4694   bj     = merge->bj;
4695   buf_ri = merge->buf_ri;
4696   buf_rj = merge->buf_rj;
4697 
4698   PetscCall(PetscMalloc1(size, &status));
4699   owners = merge->rowmap->range;
4700   len_s  = merge->len_s;
4701 
4702   /* send and recv matrix values */
4703   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4704   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4705 
4706   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4707   for (proc = 0, k = 0; proc < size; proc++) {
4708     if (!len_s[proc]) continue;
4709     i = owners[proc];
4710     PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4711     k++;
4712   }
4713 
4714   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4715   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4716   PetscCall(PetscFree(status));
4717 
4718   PetscCall(PetscFree(s_waits));
4719   PetscCall(PetscFree(r_waits));
4720 
4721   /* insert mat values of mpimat */
4722   PetscCall(PetscMalloc1(N, &ba_i));
4723   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4724 
4725   for (k = 0; k < merge->nrecv; k++) {
4726     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4727     nrows       = *buf_ri_k[k];
4728     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4729     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4730   }
4731 
4732   /* set values of ba */
4733   m = merge->rowmap->n;
4734   for (i = 0; i < m; i++) {
4735     arow = owners[rank] + i;
4736     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4737     bnzi = bi[i + 1] - bi[i];
4738     PetscCall(PetscArrayzero(ba_i, bnzi));
4739 
4740     /* add local non-zero vals of this proc's seqmat into ba */
4741     anzi   = ai[arow + 1] - ai[arow];
4742     aj     = a->j + ai[arow];
4743     aa     = a_a + ai[arow];
4744     nextaj = 0;
4745     for (j = 0; nextaj < anzi; j++) {
4746       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4747         ba_i[j] += aa[nextaj++];
4748       }
4749     }
4750 
4751     /* add received vals into ba */
4752     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4753       /* i-th row */
4754       if (i == *nextrow[k]) {
4755         anzi   = *(nextai[k] + 1) - *nextai[k];
4756         aj     = buf_rj[k] + *nextai[k];
4757         aa     = abuf_r[k] + *nextai[k];
4758         nextaj = 0;
4759         for (j = 0; nextaj < anzi; j++) {
4760           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4761             ba_i[j] += aa[nextaj++];
4762           }
4763         }
4764         nextrow[k]++;
4765         nextai[k]++;
4766       }
4767     }
4768     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4769   }
4770   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4771   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4772   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4773 
4774   PetscCall(PetscFree(abuf_r[0]));
4775   PetscCall(PetscFree(abuf_r));
4776   PetscCall(PetscFree(ba_i));
4777   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4778   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4779   PetscFunctionReturn(PETSC_SUCCESS);
4780 }
4781 
4782 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4783 {
4784   Mat                  B_mpi;
4785   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4786   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4787   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4788   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4789   PetscInt             len, *dnz, *onz, bs, cbs;
4790   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4791   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4792   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4793   MPI_Status          *status;
4794   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4795   PetscBT              lnkbt;
4796   Mat_Merge_SeqsToMPI *merge;
4797   PetscContainer       container;
4798 
4799   PetscFunctionBegin;
4800   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4801 
4802   /* make sure it is a PETSc comm */
4803   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4804   PetscCallMPI(MPI_Comm_size(comm, &size));
4805   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4806 
4807   PetscCall(PetscNew(&merge));
4808   PetscCall(PetscMalloc1(size, &status));
4809 
4810   /* determine row ownership */
4811   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4812   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4813   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4814   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4815   PetscCall(PetscLayoutSetUp(merge->rowmap));
4816   PetscCall(PetscMalloc1(size, &len_si));
4817   PetscCall(PetscMalloc1(size, &merge->len_s));
4818 
4819   m      = merge->rowmap->n;
4820   owners = merge->rowmap->range;
4821 
4822   /* determine the number of messages to send, their lengths */
4823   len_s = merge->len_s;
4824 
4825   len          = 0; /* length of buf_si[] */
4826   merge->nsend = 0;
4827   for (PetscMPIInt proc = 0; proc < size; proc++) {
4828     len_si[proc] = 0;
4829     if (proc == rank) {
4830       len_s[proc] = 0;
4831     } else {
4832       PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc]));
4833       PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */
4834     }
4835     if (len_s[proc]) {
4836       merge->nsend++;
4837       nrows = 0;
4838       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4839         if (ai[i + 1] > ai[i]) nrows++;
4840       }
4841       PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc]));
4842       len += len_si[proc];
4843     }
4844   }
4845 
4846   /* determine the number and length of messages to receive for ij-structure */
4847   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4848   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4849 
4850   /* post the Irecv of j-structure */
4851   PetscCall(PetscCommGetNewTag(comm, &tagj));
4852   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4853 
4854   /* post the Isend of j-structure */
4855   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4856 
4857   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4858     if (!len_s[proc]) continue;
4859     i = owners[proc];
4860     PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4861     k++;
4862   }
4863 
4864   /* receives and sends of j-structure are complete */
4865   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4866   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4867 
4868   /* send and recv i-structure */
4869   PetscCall(PetscCommGetNewTag(comm, &tagi));
4870   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4871 
4872   PetscCall(PetscMalloc1(len + 1, &buf_s));
4873   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4874   for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) {
4875     if (!len_s[proc]) continue;
4876     /* form outgoing message for i-structure:
4877          buf_si[0]:                 nrows to be sent
4878                [1:nrows]:           row index (global)
4879                [nrows+1:2*nrows+1]: i-structure index
4880     */
4881     nrows       = len_si[proc] / 2 - 1;
4882     buf_si_i    = buf_si + nrows + 1;
4883     buf_si[0]   = nrows;
4884     buf_si_i[0] = 0;
4885     nrows       = 0;
4886     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4887       anzi = ai[i + 1] - ai[i];
4888       if (anzi) {
4889         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4890         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4891         nrows++;
4892       }
4893     }
4894     PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4895     k++;
4896     buf_si += len_si[proc];
4897   }
4898 
4899   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4900   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4901 
4902   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4903   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4904 
4905   PetscCall(PetscFree(len_si));
4906   PetscCall(PetscFree(len_ri));
4907   PetscCall(PetscFree(rj_waits));
4908   PetscCall(PetscFree2(si_waits, sj_waits));
4909   PetscCall(PetscFree(ri_waits));
4910   PetscCall(PetscFree(buf_s));
4911   PetscCall(PetscFree(status));
4912 
4913   /* compute a local seq matrix in each processor */
4914   /* allocate bi array and free space for accumulating nonzero column info */
4915   PetscCall(PetscMalloc1(m + 1, &bi));
4916   bi[0] = 0;
4917 
4918   /* create and initialize a linked list */
4919   nlnk = N + 1;
4920   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4921 
4922   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4923   len = ai[owners[rank + 1]] - ai[owners[rank]];
4924   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4925 
4926   current_space = free_space;
4927 
4928   /* determine symbolic info for each local row */
4929   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4930 
4931   for (k = 0; k < merge->nrecv; k++) {
4932     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4933     nrows       = *buf_ri_k[k];
4934     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4935     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4936   }
4937 
4938   MatPreallocateBegin(comm, m, n, dnz, onz);
4939   len = 0;
4940   for (i = 0; i < m; i++) {
4941     bnzi = 0;
4942     /* add local non-zero cols of this proc's seqmat into lnk */
4943     arow = owners[rank] + i;
4944     anzi = ai[arow + 1] - ai[arow];
4945     aj   = a->j + ai[arow];
4946     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4947     bnzi += nlnk;
4948     /* add received col data into lnk */
4949     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4950       if (i == *nextrow[k]) {            /* i-th row */
4951         anzi = *(nextai[k] + 1) - *nextai[k];
4952         aj   = buf_rj[k] + *nextai[k];
4953         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
4954         bnzi += nlnk;
4955         nextrow[k]++;
4956         nextai[k]++;
4957       }
4958     }
4959     if (len < bnzi) len = bnzi; /* =max(bnzi) */
4960 
4961     /* if free space is not available, make more free space */
4962     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
4963     /* copy data into free space, then initialize lnk */
4964     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
4965     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
4966 
4967     current_space->array += bnzi;
4968     current_space->local_used += bnzi;
4969     current_space->local_remaining -= bnzi;
4970 
4971     bi[i + 1] = bi[i] + bnzi;
4972   }
4973 
4974   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4975 
4976   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
4977   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
4978   PetscCall(PetscLLDestroy(lnk, lnkbt));
4979 
4980   /* create symbolic parallel matrix B_mpi */
4981   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
4982   PetscCall(MatCreate(comm, &B_mpi));
4983   if (n == PETSC_DECIDE) {
4984     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
4985   } else {
4986     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4987   }
4988   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
4989   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
4990   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
4991   MatPreallocateEnd(dnz, onz);
4992   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
4993 
4994   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4995   B_mpi->assembled = PETSC_FALSE;
4996   merge->bi        = bi;
4997   merge->bj        = bj;
4998   merge->buf_ri    = buf_ri;
4999   merge->buf_rj    = buf_rj;
5000   merge->coi       = NULL;
5001   merge->coj       = NULL;
5002   merge->owners_co = NULL;
5003 
5004   PetscCall(PetscCommDestroy(&comm));
5005 
5006   /* attach the supporting struct to B_mpi for reuse */
5007   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5008   PetscCall(PetscContainerSetPointer(container, merge));
5009   PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5010   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5011   PetscCall(PetscContainerDestroy(&container));
5012   *mpimat = B_mpi;
5013 
5014   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5015   PetscFunctionReturn(PETSC_SUCCESS);
5016 }
5017 
5018 /*@
5019   MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5020   matrices from each processor
5021 
5022   Collective
5023 
5024   Input Parameters:
5025 + comm   - the communicators the parallel matrix will live on
5026 . seqmat - the input sequential matrices
5027 . m      - number of local rows (or `PETSC_DECIDE`)
5028 . n      - number of local columns (or `PETSC_DECIDE`)
5029 - scall  - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5030 
5031   Output Parameter:
5032 . mpimat - the parallel matrix generated
5033 
5034   Level: advanced
5035 
5036   Note:
5037   The dimensions of the sequential matrix in each processor MUST be the same.
5038   The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5039   destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`.
5040 
5041 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()`
5042 @*/
5043 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5044 {
5045   PetscMPIInt size;
5046 
5047   PetscFunctionBegin;
5048   PetscCallMPI(MPI_Comm_size(comm, &size));
5049   if (size == 1) {
5050     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5051     if (scall == MAT_INITIAL_MATRIX) {
5052       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5053     } else {
5054       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5055     }
5056     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5057     PetscFunctionReturn(PETSC_SUCCESS);
5058   }
5059   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5060   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5061   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5062   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5063   PetscFunctionReturn(PETSC_SUCCESS);
5064 }
5065 
5066 /*@
5067   MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix.
5068 
5069   Not Collective
5070 
5071   Input Parameter:
5072 . A - the matrix
5073 
5074   Output Parameter:
5075 . A_loc - the local sequential matrix generated
5076 
5077   Level: developer
5078 
5079   Notes:
5080   The matrix is created by taking `A`'s local rows and putting them into a sequential matrix
5081   with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and
5082   `n` is the global column count obtained with `MatGetSize()`
5083 
5084   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5085 
5086   For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count.
5087 
5088   Destroy the matrix with `MatDestroy()`
5089 
5090 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5091 @*/
5092 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5093 {
5094   PetscBool mpi;
5095 
5096   PetscFunctionBegin;
5097   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5098   if (mpi) {
5099     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5100   } else {
5101     *A_loc = A;
5102     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5103   }
5104   PetscFunctionReturn(PETSC_SUCCESS);
5105 }
5106 
5107 /*@
5108   MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix.
5109 
5110   Not Collective
5111 
5112   Input Parameters:
5113 + A     - the matrix
5114 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5115 
5116   Output Parameter:
5117 . A_loc - the local sequential matrix generated
5118 
5119   Level: developer
5120 
5121   Notes:
5122   The matrix is created by taking all `A`'s local rows and putting them into a sequential
5123   matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with
5124   `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`.
5125 
5126   In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5127 
5128   When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix),
5129   with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix
5130   then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc`
5131   and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix.
5132 
5133 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5134 @*/
5135 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5136 {
5137   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5138   Mat_SeqAIJ        *mat, *a, *b;
5139   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5140   const PetscScalar *aa, *ba, *aav, *bav;
5141   PetscScalar       *ca, *cam;
5142   PetscMPIInt        size;
5143   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5144   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5145   PetscBool          match;
5146 
5147   PetscFunctionBegin;
5148   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5149   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5150   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5151   if (size == 1) {
5152     if (scall == MAT_INITIAL_MATRIX) {
5153       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5154       *A_loc = mpimat->A;
5155     } else if (scall == MAT_REUSE_MATRIX) {
5156       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5157     }
5158     PetscFunctionReturn(PETSC_SUCCESS);
5159   }
5160 
5161   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5162   a  = (Mat_SeqAIJ *)mpimat->A->data;
5163   b  = (Mat_SeqAIJ *)mpimat->B->data;
5164   ai = a->i;
5165   aj = a->j;
5166   bi = b->i;
5167   bj = b->j;
5168   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5169   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5170   aa = aav;
5171   ba = bav;
5172   if (scall == MAT_INITIAL_MATRIX) {
5173     PetscCall(PetscMalloc1(1 + am, &ci));
5174     ci[0] = 0;
5175     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5176     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5177     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5178     k = 0;
5179     for (i = 0; i < am; i++) {
5180       ncols_o = bi[i + 1] - bi[i];
5181       ncols_d = ai[i + 1] - ai[i];
5182       /* off-diagonal portion of A */
5183       for (jo = 0; jo < ncols_o; jo++) {
5184         col = cmap[*bj];
5185         if (col >= cstart) break;
5186         cj[k] = col;
5187         bj++;
5188         ca[k++] = *ba++;
5189       }
5190       /* diagonal portion of A */
5191       for (j = 0; j < ncols_d; j++) {
5192         cj[k]   = cstart + *aj++;
5193         ca[k++] = *aa++;
5194       }
5195       /* off-diagonal portion of A */
5196       for (j = jo; j < ncols_o; j++) {
5197         cj[k]   = cmap[*bj++];
5198         ca[k++] = *ba++;
5199       }
5200     }
5201     /* put together the new matrix */
5202     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5203     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5204     /* Since these are PETSc arrays, change flags to free them as necessary. */
5205     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5206     mat->free_a  = PETSC_TRUE;
5207     mat->free_ij = PETSC_TRUE;
5208     mat->nonew   = 0;
5209   } else if (scall == MAT_REUSE_MATRIX) {
5210     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5211     ci  = mat->i;
5212     cj  = mat->j;
5213     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5214     for (i = 0; i < am; i++) {
5215       /* off-diagonal portion of A */
5216       ncols_o = bi[i + 1] - bi[i];
5217       for (jo = 0; jo < ncols_o; jo++) {
5218         col = cmap[*bj];
5219         if (col >= cstart) break;
5220         *cam++ = *ba++;
5221         bj++;
5222       }
5223       /* diagonal portion of A */
5224       ncols_d = ai[i + 1] - ai[i];
5225       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5226       /* off-diagonal portion of A */
5227       for (j = jo; j < ncols_o; j++) {
5228         *cam++ = *ba++;
5229         bj++;
5230       }
5231     }
5232     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5233   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5234   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5235   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5236   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5237   PetscFunctionReturn(PETSC_SUCCESS);
5238 }
5239 
5240 /*@
5241   MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5242   mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part
5243 
5244   Not Collective
5245 
5246   Input Parameters:
5247 + A     - the matrix
5248 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5249 
5250   Output Parameters:
5251 + glob  - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5252 - A_loc - the local sequential matrix generated
5253 
5254   Level: developer
5255 
5256   Note:
5257   This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5258   part, then those associated with the off-diagonal part (in its local ordering)
5259 
5260 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5261 @*/
5262 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5263 {
5264   Mat             Ao, Ad;
5265   const PetscInt *cmap;
5266   PetscMPIInt     size;
5267   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5268 
5269   PetscFunctionBegin;
5270   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5271   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5272   if (size == 1) {
5273     if (scall == MAT_INITIAL_MATRIX) {
5274       PetscCall(PetscObjectReference((PetscObject)Ad));
5275       *A_loc = Ad;
5276     } else if (scall == MAT_REUSE_MATRIX) {
5277       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5278     }
5279     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5280     PetscFunctionReturn(PETSC_SUCCESS);
5281   }
5282   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5283   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5284   if (f) {
5285     PetscCall((*f)(A, scall, glob, A_loc));
5286   } else {
5287     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5288     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5289     Mat_SeqAIJ        *c;
5290     PetscInt          *ai = a->i, *aj = a->j;
5291     PetscInt          *bi = b->i, *bj = b->j;
5292     PetscInt          *ci, *cj;
5293     const PetscScalar *aa, *ba;
5294     PetscScalar       *ca;
5295     PetscInt           i, j, am, dn, on;
5296 
5297     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5298     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5299     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5300     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5301     if (scall == MAT_INITIAL_MATRIX) {
5302       PetscInt k;
5303       PetscCall(PetscMalloc1(1 + am, &ci));
5304       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5305       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5306       ci[0] = 0;
5307       for (i = 0, k = 0; i < am; i++) {
5308         const PetscInt ncols_o = bi[i + 1] - bi[i];
5309         const PetscInt ncols_d = ai[i + 1] - ai[i];
5310         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5311         /* diagonal portion of A */
5312         for (j = 0; j < ncols_d; j++, k++) {
5313           cj[k] = *aj++;
5314           ca[k] = *aa++;
5315         }
5316         /* off-diagonal portion of A */
5317         for (j = 0; j < ncols_o; j++, k++) {
5318           cj[k] = dn + *bj++;
5319           ca[k] = *ba++;
5320         }
5321       }
5322       /* put together the new matrix */
5323       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5324       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5325       /* Since these are PETSc arrays, change flags to free them as necessary. */
5326       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5327       c->free_a  = PETSC_TRUE;
5328       c->free_ij = PETSC_TRUE;
5329       c->nonew   = 0;
5330       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5331     } else if (scall == MAT_REUSE_MATRIX) {
5332       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5333       for (i = 0; i < am; i++) {
5334         const PetscInt ncols_d = ai[i + 1] - ai[i];
5335         const PetscInt ncols_o = bi[i + 1] - bi[i];
5336         /* diagonal portion of A */
5337         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5338         /* off-diagonal portion of A */
5339         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5340       }
5341       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5342     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5343     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5344     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5345     if (glob) {
5346       PetscInt cst, *gidx;
5347 
5348       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5349       PetscCall(PetscMalloc1(dn + on, &gidx));
5350       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5351       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5352       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5353     }
5354   }
5355   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5356   PetscFunctionReturn(PETSC_SUCCESS);
5357 }
5358 
5359 /*@C
5360   MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5361 
5362   Not Collective
5363 
5364   Input Parameters:
5365 + A     - the matrix
5366 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5367 . row   - index set of rows to extract (or `NULL`)
5368 - col   - index set of columns to extract (or `NULL`)
5369 
5370   Output Parameter:
5371 . A_loc - the local sequential matrix generated
5372 
5373   Level: developer
5374 
5375 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5376 @*/
5377 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5378 {
5379   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5380   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5381   IS          isrowa, iscola;
5382   Mat        *aloc;
5383   PetscBool   match;
5384 
5385   PetscFunctionBegin;
5386   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5387   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5388   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5389   if (!row) {
5390     start = A->rmap->rstart;
5391     end   = A->rmap->rend;
5392     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5393   } else {
5394     isrowa = *row;
5395   }
5396   if (!col) {
5397     start = A->cmap->rstart;
5398     cmap  = a->garray;
5399     nzA   = a->A->cmap->n;
5400     nzB   = a->B->cmap->n;
5401     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5402     ncols = 0;
5403     for (i = 0; i < nzB; i++) {
5404       if (cmap[i] < start) idx[ncols++] = cmap[i];
5405       else break;
5406     }
5407     imark = i;
5408     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5409     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5410     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5411   } else {
5412     iscola = *col;
5413   }
5414   if (scall != MAT_INITIAL_MATRIX) {
5415     PetscCall(PetscMalloc1(1, &aloc));
5416     aloc[0] = *A_loc;
5417   }
5418   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5419   if (!col) { /* attach global id of condensed columns */
5420     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5421   }
5422   *A_loc = aloc[0];
5423   PetscCall(PetscFree(aloc));
5424   if (!row) PetscCall(ISDestroy(&isrowa));
5425   if (!col) PetscCall(ISDestroy(&iscola));
5426   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5427   PetscFunctionReturn(PETSC_SUCCESS);
5428 }
5429 
5430 /*
5431  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5432  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5433  * on a global size.
5434  * */
5435 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5436 {
5437   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5438   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth;
5439   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5440   PetscMPIInt            owner;
5441   PetscSFNode           *iremote, *oiremote;
5442   const PetscInt        *lrowindices;
5443   PetscSF                sf, osf;
5444   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5445   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5446   MPI_Comm               comm;
5447   ISLocalToGlobalMapping mapping;
5448   const PetscScalar     *pd_a, *po_a;
5449 
5450   PetscFunctionBegin;
5451   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5452   /* plocalsize is the number of roots
5453    * nrows is the number of leaves
5454    * */
5455   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5456   PetscCall(ISGetLocalSize(rows, &nrows));
5457   PetscCall(PetscCalloc1(nrows, &iremote));
5458   PetscCall(ISGetIndices(rows, &lrowindices));
5459   for (i = 0; i < nrows; i++) {
5460     /* Find a remote index and an owner for a row
5461      * The row could be local or remote
5462      * */
5463     owner = 0;
5464     lidx  = 0;
5465     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5466     iremote[i].index = lidx;
5467     iremote[i].rank  = owner;
5468   }
5469   /* Create SF to communicate how many nonzero columns for each row */
5470   PetscCall(PetscSFCreate(comm, &sf));
5471   /* SF will figure out the number of nonzero columns for each row, and their
5472    * offsets
5473    * */
5474   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5475   PetscCall(PetscSFSetFromOptions(sf));
5476   PetscCall(PetscSFSetUp(sf));
5477 
5478   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5479   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5480   PetscCall(PetscCalloc1(nrows, &pnnz));
5481   roffsets[0] = 0;
5482   roffsets[1] = 0;
5483   for (i = 0; i < plocalsize; i++) {
5484     /* diagonal */
5485     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5486     /* off-diagonal */
5487     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5488     /* compute offsets so that we relative location for each row */
5489     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5490     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5491   }
5492   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5493   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5494   /* 'r' means root, and 'l' means leaf */
5495   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5496   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5497   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5498   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5499   PetscCall(PetscSFDestroy(&sf));
5500   PetscCall(PetscFree(roffsets));
5501   PetscCall(PetscFree(nrcols));
5502   dntotalcols = 0;
5503   ontotalcols = 0;
5504   ncol        = 0;
5505   for (i = 0; i < nrows; i++) {
5506     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5507     ncol    = PetscMax(pnnz[i], ncol);
5508     /* diagonal */
5509     dntotalcols += nlcols[i * 2 + 0];
5510     /* off-diagonal */
5511     ontotalcols += nlcols[i * 2 + 1];
5512   }
5513   /* We do not need to figure the right number of columns
5514    * since all the calculations will be done by going through the raw data
5515    * */
5516   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5517   PetscCall(MatSetUp(*P_oth));
5518   PetscCall(PetscFree(pnnz));
5519   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5520   /* diagonal */
5521   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5522   /* off-diagonal */
5523   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5524   /* diagonal */
5525   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5526   /* off-diagonal */
5527   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5528   dntotalcols = 0;
5529   ontotalcols = 0;
5530   ntotalcols  = 0;
5531   for (i = 0; i < nrows; i++) {
5532     owner = 0;
5533     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5534     /* Set iremote for diag matrix */
5535     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5536       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5537       iremote[dntotalcols].rank  = owner;
5538       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5539       ilocal[dntotalcols++] = ntotalcols++;
5540     }
5541     /* off-diagonal */
5542     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5543       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5544       oiremote[ontotalcols].rank  = owner;
5545       oilocal[ontotalcols++]      = ntotalcols++;
5546     }
5547   }
5548   PetscCall(ISRestoreIndices(rows, &lrowindices));
5549   PetscCall(PetscFree(loffsets));
5550   PetscCall(PetscFree(nlcols));
5551   PetscCall(PetscSFCreate(comm, &sf));
5552   /* P serves as roots and P_oth is leaves
5553    * Diag matrix
5554    * */
5555   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5556   PetscCall(PetscSFSetFromOptions(sf));
5557   PetscCall(PetscSFSetUp(sf));
5558 
5559   PetscCall(PetscSFCreate(comm, &osf));
5560   /* off-diagonal */
5561   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5562   PetscCall(PetscSFSetFromOptions(osf));
5563   PetscCall(PetscSFSetUp(osf));
5564   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5565   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5566   /* operate on the matrix internal data to save memory */
5567   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5568   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5569   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5570   /* Convert to global indices for diag matrix */
5571   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5572   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5573   /* We want P_oth store global indices */
5574   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5575   /* Use memory scalable approach */
5576   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5577   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5578   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5579   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5580   /* Convert back to local indices */
5581   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5582   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5583   nout = 0;
5584   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5585   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5586   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5587   /* Exchange values */
5588   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5589   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5590   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5591   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5592   /* Stop PETSc from shrinking memory */
5593   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5594   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5595   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5596   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5597   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5598   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5599   PetscCall(PetscSFDestroy(&sf));
5600   PetscCall(PetscSFDestroy(&osf));
5601   PetscFunctionReturn(PETSC_SUCCESS);
5602 }
5603 
5604 /*
5605  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5606  * This supports MPIAIJ and MAIJ
5607  * */
5608 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5609 {
5610   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5611   Mat_SeqAIJ *p_oth;
5612   IS          rows, map;
5613   PetscHMapI  hamp;
5614   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5615   MPI_Comm    comm;
5616   PetscSF     sf, osf;
5617   PetscBool   has;
5618 
5619   PetscFunctionBegin;
5620   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5621   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5622   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5623    *  and then create a submatrix (that often is an overlapping matrix)
5624    * */
5625   if (reuse == MAT_INITIAL_MATRIX) {
5626     /* Use a hash table to figure out unique keys */
5627     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5628     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5629     count = 0;
5630     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5631     for (i = 0; i < a->B->cmap->n; i++) {
5632       key = a->garray[i] / dof;
5633       PetscCall(PetscHMapIHas(hamp, key, &has));
5634       if (!has) {
5635         mapping[i] = count;
5636         PetscCall(PetscHMapISet(hamp, key, count++));
5637       } else {
5638         /* Current 'i' has the same value the previous step */
5639         mapping[i] = count - 1;
5640       }
5641     }
5642     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5643     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5644     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5645     PetscCall(PetscCalloc1(htsize, &rowindices));
5646     off = 0;
5647     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5648     PetscCall(PetscHMapIDestroy(&hamp));
5649     PetscCall(PetscSortInt(htsize, rowindices));
5650     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5651     /* In case, the matrix was already created but users want to recreate the matrix */
5652     PetscCall(MatDestroy(P_oth));
5653     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5654     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5655     PetscCall(ISDestroy(&map));
5656     PetscCall(ISDestroy(&rows));
5657   } else if (reuse == MAT_REUSE_MATRIX) {
5658     /* If matrix was already created, we simply update values using SF objects
5659      * that as attached to the matrix earlier.
5660      */
5661     const PetscScalar *pd_a, *po_a;
5662 
5663     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5664     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5665     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5666     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5667     /* Update values in place */
5668     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5669     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5670     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5671     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5672     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5673     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5674     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5675     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5676   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5677   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5678   PetscFunctionReturn(PETSC_SUCCESS);
5679 }
5680 
5681 /*@C
5682   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5683 
5684   Collective
5685 
5686   Input Parameters:
5687 + A     - the first matrix in `MATMPIAIJ` format
5688 . B     - the second matrix in `MATMPIAIJ` format
5689 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5690 
5691   Output Parameters:
5692 + rowb  - On input index sets of rows of B to extract (or `NULL`), modified on output
5693 . colb  - On input index sets of columns of B to extract (or `NULL`), modified on output
5694 - B_seq - the sequential matrix generated
5695 
5696   Level: developer
5697 
5698 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5699 @*/
5700 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5701 {
5702   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5703   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5704   IS          isrowb, iscolb;
5705   Mat        *bseq = NULL;
5706 
5707   PetscFunctionBegin;
5708   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5709              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5710   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5711 
5712   if (scall == MAT_INITIAL_MATRIX) {
5713     start = A->cmap->rstart;
5714     cmap  = a->garray;
5715     nzA   = a->A->cmap->n;
5716     nzB   = a->B->cmap->n;
5717     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5718     ncols = 0;
5719     for (i = 0; i < nzB; i++) { /* row < local row index */
5720       if (cmap[i] < start) idx[ncols++] = cmap[i];
5721       else break;
5722     }
5723     imark = i;
5724     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5725     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5726     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5727     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5728   } else {
5729     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5730     isrowb = *rowb;
5731     iscolb = *colb;
5732     PetscCall(PetscMalloc1(1, &bseq));
5733     bseq[0] = *B_seq;
5734   }
5735   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5736   *B_seq = bseq[0];
5737   PetscCall(PetscFree(bseq));
5738   if (!rowb) {
5739     PetscCall(ISDestroy(&isrowb));
5740   } else {
5741     *rowb = isrowb;
5742   }
5743   if (!colb) {
5744     PetscCall(ISDestroy(&iscolb));
5745   } else {
5746     *colb = iscolb;
5747   }
5748   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5749   PetscFunctionReturn(PETSC_SUCCESS);
5750 }
5751 
5752 /*
5753     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5754     of the OFF-DIAGONAL portion of local A
5755 
5756     Collective
5757 
5758    Input Parameters:
5759 +    A,B - the matrices in `MATMPIAIJ` format
5760 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5761 
5762    Output Parameter:
5763 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5764 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5765 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5766 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5767 
5768     Developer Note:
5769     This directly accesses information inside the VecScatter associated with the matrix-vector product
5770      for this matrix. This is not desirable..
5771 
5772     Level: developer
5773 
5774 */
5775 
5776 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5777 {
5778   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5779   VecScatter         ctx;
5780   MPI_Comm           comm;
5781   const PetscMPIInt *rprocs, *sprocs;
5782   PetscMPIInt        nrecvs, nsends;
5783   const PetscInt    *srow, *rstarts, *sstarts;
5784   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5785   PetscInt           i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len;
5786   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5787   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5788   PetscMPIInt        size, tag, rank, nreqs;
5789 
5790   PetscFunctionBegin;
5791   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5792   PetscCallMPI(MPI_Comm_size(comm, &size));
5793 
5794   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5795              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5796   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5797   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5798 
5799   if (size == 1) {
5800     startsj_s = NULL;
5801     bufa_ptr  = NULL;
5802     *B_oth    = NULL;
5803     PetscFunctionReturn(PETSC_SUCCESS);
5804   }
5805 
5806   ctx = a->Mvctx;
5807   tag = ((PetscObject)ctx)->tag;
5808 
5809   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5810   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5811   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5812   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5813   PetscCall(PetscMalloc1(nreqs, &reqs));
5814   rwaits = reqs;
5815   swaits = PetscSafePointerPlusOffset(reqs, nrecvs);
5816 
5817   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5818   if (scall == MAT_INITIAL_MATRIX) {
5819     /* i-array */
5820     /*  post receives */
5821     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5822     for (i = 0; i < nrecvs; i++) {
5823       rowlen = rvalues + rstarts[i] * rbs;
5824       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5825       PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5826     }
5827 
5828     /* pack the outgoing message */
5829     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5830 
5831     sstartsj[0] = 0;
5832     rstartsj[0] = 0;
5833     len         = 0; /* total length of j or a array to be sent */
5834     if (nsends) {
5835       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5836       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5837     }
5838     for (i = 0; i < nsends; i++) {
5839       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5840       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5841       for (j = 0; j < nrows; j++) {
5842         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5843         for (l = 0; l < sbs; l++) {
5844           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5845 
5846           rowlen[j * sbs + l] = ncols;
5847 
5848           len += ncols;
5849           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5850         }
5851         k++;
5852       }
5853       PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5854 
5855       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5856     }
5857     /* recvs and sends of i-array are completed */
5858     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5859     PetscCall(PetscFree(svalues));
5860 
5861     /* allocate buffers for sending j and a arrays */
5862     PetscCall(PetscMalloc1(len + 1, &bufj));
5863     PetscCall(PetscMalloc1(len + 1, &bufa));
5864 
5865     /* create i-array of B_oth */
5866     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5867 
5868     b_othi[0] = 0;
5869     len       = 0; /* total length of j or a array to be received */
5870     k         = 0;
5871     for (i = 0; i < nrecvs; i++) {
5872       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5873       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5874       for (j = 0; j < nrows; j++) {
5875         b_othi[k + 1] = b_othi[k] + rowlen[j];
5876         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5877         k++;
5878       }
5879       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5880     }
5881     PetscCall(PetscFree(rvalues));
5882 
5883     /* allocate space for j and a arrays of B_oth */
5884     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5885     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5886 
5887     /* j-array */
5888     /*  post receives of j-array */
5889     for (i = 0; i < nrecvs; i++) {
5890       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5891       PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5892     }
5893 
5894     /* pack the outgoing message j-array */
5895     if (nsends) k = sstarts[0];
5896     for (i = 0; i < nsends; i++) {
5897       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5898       bufJ  = bufj + sstartsj[i];
5899       for (j = 0; j < nrows; j++) {
5900         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5901         for (ll = 0; ll < sbs; ll++) {
5902           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5903           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5904           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5905         }
5906       }
5907       PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5908     }
5909 
5910     /* recvs and sends of j-array are completed */
5911     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5912   } else if (scall == MAT_REUSE_MATRIX) {
5913     sstartsj = *startsj_s;
5914     rstartsj = *startsj_r;
5915     bufa     = *bufa_ptr;
5916     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5917   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5918 
5919   /* a-array */
5920   /*  post receives of a-array */
5921   for (i = 0; i < nrecvs; i++) {
5922     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5923     PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5924   }
5925 
5926   /* pack the outgoing message a-array */
5927   if (nsends) k = sstarts[0];
5928   for (i = 0; i < nsends; i++) {
5929     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5930     bufA  = bufa + sstartsj[i];
5931     for (j = 0; j < nrows; j++) {
5932       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5933       for (ll = 0; ll < sbs; ll++) {
5934         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5935         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
5936         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
5937       }
5938     }
5939     PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
5940   }
5941   /* recvs and sends of a-array are completed */
5942   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5943   PetscCall(PetscFree(reqs));
5944 
5945   if (scall == MAT_INITIAL_MATRIX) {
5946     Mat_SeqAIJ *b_oth;
5947 
5948     /* put together the new matrix */
5949     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
5950 
5951     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5952     /* Since these are PETSc arrays, change flags to free them as necessary. */
5953     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
5954     b_oth->free_a  = PETSC_TRUE;
5955     b_oth->free_ij = PETSC_TRUE;
5956     b_oth->nonew   = 0;
5957 
5958     PetscCall(PetscFree(bufj));
5959     if (!startsj_s || !bufa_ptr) {
5960       PetscCall(PetscFree2(sstartsj, rstartsj));
5961       PetscCall(PetscFree(bufa_ptr));
5962     } else {
5963       *startsj_s = sstartsj;
5964       *startsj_r = rstartsj;
5965       *bufa_ptr  = bufa;
5966     }
5967   } else if (scall == MAT_REUSE_MATRIX) {
5968     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
5969   }
5970 
5971   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
5972   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
5973   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
5974   PetscFunctionReturn(PETSC_SUCCESS);
5975 }
5976 
5977 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
5978 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
5979 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
5980 #if defined(PETSC_HAVE_MKL_SPARSE)
5981 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
5982 #endif
5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
5985 #if defined(PETSC_HAVE_ELEMENTAL)
5986 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
5987 #endif
5988 #if defined(PETSC_HAVE_SCALAPACK)
5989 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
5990 #endif
5991 #if defined(PETSC_HAVE_HYPRE)
5992 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
5993 #endif
5994 #if defined(PETSC_HAVE_CUDA)
5995 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
5996 #endif
5997 #if defined(PETSC_HAVE_HIP)
5998 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
5999 #endif
6000 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6001 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6002 #endif
6003 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6004 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6005 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6006 
6007 /*
6008     Computes (B'*A')' since computing B*A directly is untenable
6009 
6010                n                       p                          p
6011         [             ]       [             ]         [                 ]
6012       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6013         [             ]       [             ]         [                 ]
6014 
6015 */
6016 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6017 {
6018   Mat At, Bt, Ct;
6019 
6020   PetscFunctionBegin;
6021   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6022   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6023   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct));
6024   PetscCall(MatDestroy(&At));
6025   PetscCall(MatDestroy(&Bt));
6026   PetscCall(MatTransposeSetPrecursor(Ct, C));
6027   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6028   PetscCall(MatDestroy(&Ct));
6029   PetscFunctionReturn(PETSC_SUCCESS);
6030 }
6031 
6032 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6033 {
6034   PetscBool cisdense;
6035 
6036   PetscFunctionBegin;
6037   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6038   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6039   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6040   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6041   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6042   PetscCall(MatSetUp(C));
6043 
6044   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6045   PetscFunctionReturn(PETSC_SUCCESS);
6046 }
6047 
6048 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6049 {
6050   Mat_Product *product = C->product;
6051   Mat          A = product->A, B = product->B;
6052 
6053   PetscFunctionBegin;
6054   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6055              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6056   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6057   C->ops->productsymbolic = MatProductSymbolic_AB;
6058   PetscFunctionReturn(PETSC_SUCCESS);
6059 }
6060 
6061 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6062 {
6063   Mat_Product *product = C->product;
6064 
6065   PetscFunctionBegin;
6066   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6067   PetscFunctionReturn(PETSC_SUCCESS);
6068 }
6069 
6070 /*
6071    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6072 
6073   Input Parameters:
6074 
6075     j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1)
6076     j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2)
6077 
6078     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6079 
6080     For Set1, j1[] contains column indices of the nonzeros.
6081     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6082     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6083     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6084 
6085     Similar for Set2.
6086 
6087     This routine merges the two sets of nonzeros row by row and removes repeats.
6088 
6089   Output Parameters: (memory is allocated by the caller)
6090 
6091     i[],j[]: the CSR of the merged matrix, which has m rows.
6092     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6093     imap2[]: similar to imap1[], but for Set2.
6094     Note we order nonzeros row-by-row and from left to right.
6095 */
6096 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6097 {
6098   PetscInt   r, m; /* Row index of mat */
6099   PetscCount t, t1, t2, b1, e1, b2, e2;
6100 
6101   PetscFunctionBegin;
6102   PetscCall(MatGetLocalSize(mat, &m, NULL));
6103   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6104   i[0]        = 0;
6105   for (r = 0; r < m; r++) { /* Do row by row merging */
6106     b1 = rowBegin1[r];
6107     e1 = rowEnd1[r];
6108     b2 = rowBegin2[r];
6109     e2 = rowEnd2[r];
6110     while (b1 < e1 && b2 < e2) {
6111       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6112         j[t]      = j1[b1];
6113         imap1[t1] = t;
6114         imap2[t2] = t;
6115         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6116         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6117         t1++;
6118         t2++;
6119         t++;
6120       } else if (j1[b1] < j2[b2]) {
6121         j[t]      = j1[b1];
6122         imap1[t1] = t;
6123         b1 += jmap1[t1 + 1] - jmap1[t1];
6124         t1++;
6125         t++;
6126       } else {
6127         j[t]      = j2[b2];
6128         imap2[t2] = t;
6129         b2 += jmap2[t2 + 1] - jmap2[t2];
6130         t2++;
6131         t++;
6132       }
6133     }
6134     /* Merge the remaining in either j1[] or j2[] */
6135     while (b1 < e1) {
6136       j[t]      = j1[b1];
6137       imap1[t1] = t;
6138       b1 += jmap1[t1 + 1] - jmap1[t1];
6139       t1++;
6140       t++;
6141     }
6142     while (b2 < e2) {
6143       j[t]      = j2[b2];
6144       imap2[t2] = t;
6145       b2 += jmap2[t2 + 1] - jmap2[t2];
6146       t2++;
6147       t++;
6148     }
6149     PetscCall(PetscIntCast(t, i + r + 1));
6150   }
6151   PetscFunctionReturn(PETSC_SUCCESS);
6152 }
6153 
6154 /*
6155   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6156 
6157   Input Parameters:
6158     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6159     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6160       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6161 
6162       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6163       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6164 
6165   Output Parameters:
6166     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6167     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6168       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6169       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6170 
6171     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6172       Atot: number of entries belonging to the diagonal block.
6173       Annz: number of unique nonzeros belonging to the diagonal block.
6174       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6175         repeats (i.e., same 'i,j' pair).
6176       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6177         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6178 
6179       Atot: number of entries belonging to the diagonal block
6180       Annz: number of unique nonzeros belonging to the diagonal block.
6181 
6182     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6183 
6184     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6185 */
6186 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6187 {
6188   PetscInt    cstart, cend, rstart, rend, row, col;
6189   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6190   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6191   PetscCount  k, m, p, q, r, s, mid;
6192   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6193 
6194   PetscFunctionBegin;
6195   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6196   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6197   m = rend - rstart;
6198 
6199   /* Skip negative rows */
6200   for (k = 0; k < n; k++)
6201     if (i[k] >= 0) break;
6202 
6203   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6204      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6205   */
6206   while (k < n) {
6207     row = i[k];
6208     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6209     for (s = k; s < n; s++)
6210       if (i[s] != row) break;
6211 
6212     /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */
6213     for (p = k; p < s; p++) {
6214       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX;
6215     }
6216     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6217     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6218     rowBegin[row - rstart] = k;
6219     rowMid[row - rstart]   = mid;
6220     rowEnd[row - rstart]   = s;
6221     PetscCheck(k == s || j[s - 1] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is >= matrix column size %" PetscInt_FMT, j[s - 1], mat->cmap->N);
6222 
6223     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6224     Atot += mid - k;
6225     Btot += s - mid;
6226 
6227     /* Count unique nonzeros of this diag row */
6228     for (p = k; p < mid;) {
6229       col = j[p];
6230       do {
6231         j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */
6232         p++;
6233       } while (p < mid && j[p] == col);
6234       Annz++;
6235     }
6236 
6237     /* Count unique nonzeros of this offdiag row */
6238     for (p = mid; p < s;) {
6239       col = j[p];
6240       do {
6241         p++;
6242       } while (p < s && j[p] == col);
6243       Bnnz++;
6244     }
6245     k = s;
6246   }
6247 
6248   /* Allocation according to Atot, Btot, Annz, Bnnz */
6249   PetscCall(PetscMalloc1(Atot, &Aperm));
6250   PetscCall(PetscMalloc1(Btot, &Bperm));
6251   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6252   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6253 
6254   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6255   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6256   for (r = 0; r < m; r++) {
6257     k   = rowBegin[r];
6258     mid = rowMid[r];
6259     s   = rowEnd[r];
6260     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k));
6261     PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid));
6262     Atot += mid - k;
6263     Btot += s - mid;
6264 
6265     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6266     for (p = k; p < mid;) {
6267       col = j[p];
6268       q   = p;
6269       do {
6270         p++;
6271       } while (p < mid && j[p] == col);
6272       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6273       Annz++;
6274     }
6275 
6276     for (p = mid; p < s;) {
6277       col = j[p];
6278       q   = p;
6279       do {
6280         p++;
6281       } while (p < s && j[p] == col);
6282       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6283       Bnnz++;
6284     }
6285   }
6286   /* Output */
6287   *Aperm_ = Aperm;
6288   *Annz_  = Annz;
6289   *Atot_  = Atot;
6290   *Ajmap_ = Ajmap;
6291   *Bperm_ = Bperm;
6292   *Bnnz_  = Bnnz;
6293   *Btot_  = Btot;
6294   *Bjmap_ = Bjmap;
6295   PetscFunctionReturn(PETSC_SUCCESS);
6296 }
6297 
6298 /*
6299   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6300 
6301   Input Parameters:
6302     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6303     nnz:  number of unique nonzeros in the merged matrix
6304     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6305     jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set
6306 
6307   Output Parameter: (memory is allocated by the caller)
6308     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6309 
6310   Example:
6311     nnz1 = 4
6312     nnz  = 6
6313     imap = [1,3,4,5]
6314     jmap = [0,3,5,6,7]
6315    then,
6316     jmap_new = [0,0,3,3,5,6,7]
6317 */
6318 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6319 {
6320   PetscCount k, p;
6321 
6322   PetscFunctionBegin;
6323   jmap_new[0] = 0;
6324   p           = nnz;                /* p loops over jmap_new[] backwards */
6325   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6326     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6327   }
6328   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6329   PetscFunctionReturn(PETSC_SUCCESS);
6330 }
6331 
6332 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data)
6333 {
6334   MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data;
6335 
6336   PetscFunctionBegin;
6337   PetscCall(PetscSFDestroy(&coo->sf));
6338   PetscCall(PetscFree(coo->Aperm1));
6339   PetscCall(PetscFree(coo->Bperm1));
6340   PetscCall(PetscFree(coo->Ajmap1));
6341   PetscCall(PetscFree(coo->Bjmap1));
6342   PetscCall(PetscFree(coo->Aimap2));
6343   PetscCall(PetscFree(coo->Bimap2));
6344   PetscCall(PetscFree(coo->Aperm2));
6345   PetscCall(PetscFree(coo->Bperm2));
6346   PetscCall(PetscFree(coo->Ajmap2));
6347   PetscCall(PetscFree(coo->Bjmap2));
6348   PetscCall(PetscFree(coo->Cperm1));
6349   PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf));
6350   PetscCall(PetscFree(coo));
6351   PetscFunctionReturn(PETSC_SUCCESS);
6352 }
6353 
6354 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6355 {
6356   MPI_Comm             comm;
6357   PetscMPIInt          rank, size;
6358   PetscInt             m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6359   PetscCount           k, p, q, rem;                           /* Loop variables over coo arrays */
6360   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6361   PetscContainer       container;
6362   MatCOOStruct_MPIAIJ *coo;
6363 
6364   PetscFunctionBegin;
6365   PetscCall(PetscFree(mpiaij->garray));
6366   PetscCall(VecDestroy(&mpiaij->lvec));
6367 #if defined(PETSC_USE_CTABLE)
6368   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6369 #else
6370   PetscCall(PetscFree(mpiaij->colmap));
6371 #endif
6372   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6373   mat->assembled     = PETSC_FALSE;
6374   mat->was_assembled = PETSC_FALSE;
6375 
6376   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6377   PetscCallMPI(MPI_Comm_size(comm, &size));
6378   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6379   PetscCall(PetscLayoutSetUp(mat->rmap));
6380   PetscCall(PetscLayoutSetUp(mat->cmap));
6381   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6382   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6383   PetscCall(MatGetLocalSize(mat, &m, &n));
6384   PetscCall(MatGetSize(mat, &M, &N));
6385 
6386   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6387   /* entries come first, then local rows, then remote rows.                     */
6388   PetscCount n1 = coo_n, *perm1;
6389   PetscInt  *i1 = coo_i, *j1 = coo_j;
6390 
6391   PetscCall(PetscMalloc1(n1, &perm1));
6392   for (k = 0; k < n1; k++) perm1[k] = k;
6393 
6394   /* Manipulate indices so that entries with negative row or col indices will have smallest
6395      row indices, local entries will have greater but negative row indices, and remote entries
6396      will have positive row indices.
6397   */
6398   for (k = 0; k < n1; k++) {
6399     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN;                /* e.g., -2^31, minimal to move them ahead */
6400     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */
6401     else {
6402       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6403       if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */
6404     }
6405   }
6406 
6407   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6408   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6409 
6410   /* Advance k to the first entry we need to take care of */
6411   for (k = 0; k < n1; k++)
6412     if (i1[k] > PETSC_INT_MIN) break;
6413   PetscCount i1start = k;
6414 
6415   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */
6416   for (; k < rem; k++) i1[k] += PETSC_INT_MAX;                                    /* Revert row indices of local rows*/
6417 
6418   PetscCheck(i1 == NULL || i1[n1 - 1] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "COO row index %" PetscInt_FMT " is >= the matrix row size %" PetscInt_FMT, i1[n1 - 1], M);
6419 
6420   /*           Send remote rows to their owner                                  */
6421   /* Find which rows should be sent to which remote ranks*/
6422   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6423   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6424   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6425   const PetscInt *ranges;
6426   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6427 
6428   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6429   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6430   for (k = rem; k < n1;) {
6431     PetscMPIInt owner;
6432     PetscInt    firstRow, lastRow;
6433 
6434     /* Locate a row range */
6435     firstRow = i1[k]; /* first row of this owner */
6436     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6437     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6438 
6439     /* Find the first index 'p' in [k,n) with i1[p] belonging to next owner */
6440     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6441 
6442     /* All entries in [k,p) belong to this remote owner */
6443     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6444       PetscMPIInt *sendto2;
6445       PetscInt    *nentries2;
6446       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6447 
6448       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6449       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6450       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6451       PetscCall(PetscFree2(sendto, nentries2));
6452       sendto   = sendto2;
6453       nentries = nentries2;
6454       maxNsend = maxNsend2;
6455     }
6456     sendto[nsend] = owner;
6457     PetscCall(PetscIntCast(p - k, &nentries[nsend]));
6458     nsend++;
6459     k = p;
6460   }
6461 
6462   /* Build 1st SF to know offsets on remote to send data */
6463   PetscSF      sf1;
6464   PetscInt     nroots = 1, nroots2 = 0;
6465   PetscInt     nleaves = nsend, nleaves2 = 0;
6466   PetscInt    *offsets;
6467   PetscSFNode *iremote;
6468 
6469   PetscCall(PetscSFCreate(comm, &sf1));
6470   PetscCall(PetscMalloc1(nsend, &iremote));
6471   PetscCall(PetscMalloc1(nsend, &offsets));
6472   for (k = 0; k < nsend; k++) {
6473     iremote[k].rank  = sendto[k];
6474     iremote[k].index = 0;
6475     nleaves2 += nentries[k];
6476     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6477   }
6478   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6479   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6480   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6481   PetscCall(PetscSFDestroy(&sf1));
6482   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem);
6483 
6484   /* Build 2nd SF to send remote COOs to their owner */
6485   PetscSF sf2;
6486   nroots  = nroots2;
6487   nleaves = nleaves2;
6488   PetscCall(PetscSFCreate(comm, &sf2));
6489   PetscCall(PetscSFSetFromOptions(sf2));
6490   PetscCall(PetscMalloc1(nleaves, &iremote));
6491   p = 0;
6492   for (k = 0; k < nsend; k++) {
6493     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6494     for (q = 0; q < nentries[k]; q++, p++) {
6495       iremote[p].rank = sendto[k];
6496       PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index));
6497     }
6498   }
6499   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6500 
6501   /* Send the remote COOs to their owner */
6502   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6503   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6504   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6505   PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6506   PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6507   PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem);
6508   PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem);
6509   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6510   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE));
6511   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6512   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE));
6513 
6514   PetscCall(PetscFree(offsets));
6515   PetscCall(PetscFree2(sendto, nentries));
6516 
6517   /* Sort received COOs by row along with the permutation array     */
6518   for (k = 0; k < n2; k++) perm2[k] = k;
6519   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6520 
6521   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6522   PetscCount *Cperm1;
6523   PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null");
6524   PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem);
6525   PetscCall(PetscMalloc1(nleaves, &Cperm1));
6526   PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves));
6527 
6528   /* Support for HYPRE matrices, kind of a hack.
6529      Swap min column with diagonal so that diagonal values will go first */
6530   PetscBool hypre;
6531   PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre));
6532   if (hypre) {
6533     PetscInt *minj;
6534     PetscBT   hasdiag;
6535 
6536     PetscCall(PetscBTCreate(m, &hasdiag));
6537     PetscCall(PetscMalloc1(m, &minj));
6538     for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX;
6539     for (k = i1start; k < rem; k++) {
6540       if (j1[k] < cstart || j1[k] >= cend) continue;
6541       const PetscInt rindex = i1[k] - rstart;
6542       if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6543       minj[rindex] = PetscMin(minj[rindex], j1[k]);
6544     }
6545     for (k = 0; k < n2; k++) {
6546       if (j2[k] < cstart || j2[k] >= cend) continue;
6547       const PetscInt rindex = i2[k] - rstart;
6548       if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex));
6549       minj[rindex] = PetscMin(minj[rindex], j2[k]);
6550     }
6551     for (k = i1start; k < rem; k++) {
6552       const PetscInt rindex = i1[k] - rstart;
6553       if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6554       if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart);
6555       else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex];
6556     }
6557     for (k = 0; k < n2; k++) {
6558       const PetscInt rindex = i2[k] - rstart;
6559       if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue;
6560       if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart);
6561       else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex];
6562     }
6563     PetscCall(PetscBTDestroy(&hasdiag));
6564     PetscCall(PetscFree(minj));
6565   }
6566 
6567   /* Split local COOs and received COOs into diag/offdiag portions */
6568   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6569   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1;
6570   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6571   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6572   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6573   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6574 
6575   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6576   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6577   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6578   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6579 
6580   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6581   PetscInt *Ai, *Bi;
6582   PetscInt *Aj, *Bj;
6583 
6584   PetscCall(PetscMalloc1(m + 1, &Ai));
6585   PetscCall(PetscMalloc1(m + 1, &Bi));
6586   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6587   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6588 
6589   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6590   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6591   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6592   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6593   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6594 
6595   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6596   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6597 
6598   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6599   /* expect nonzeros in A/B most likely have local contributing entries        */
6600   PetscInt    Annz = Ai[m];
6601   PetscInt    Bnnz = Bi[m];
6602   PetscCount *Ajmap1_new, *Bjmap1_new;
6603 
6604   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6605   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6606 
6607   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6608   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6609 
6610   PetscCall(PetscFree(Aimap1));
6611   PetscCall(PetscFree(Ajmap1));
6612   PetscCall(PetscFree(Bimap1));
6613   PetscCall(PetscFree(Bjmap1));
6614   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6615   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6616   PetscCall(PetscFree(perm1));
6617   PetscCall(PetscFree3(i2, j2, perm2));
6618 
6619   Ajmap1 = Ajmap1_new;
6620   Bjmap1 = Bjmap1_new;
6621 
6622   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6623   if (Annz < Annz1 + Annz2) {
6624     PetscInt *Aj_new;
6625     PetscCall(PetscMalloc1(Annz, &Aj_new));
6626     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6627     PetscCall(PetscFree(Aj));
6628     Aj = Aj_new;
6629   }
6630 
6631   if (Bnnz < Bnnz1 + Bnnz2) {
6632     PetscInt *Bj_new;
6633     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6634     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6635     PetscCall(PetscFree(Bj));
6636     Bj = Bj_new;
6637   }
6638 
6639   /* Create new submatrices for on-process and off-process coupling                  */
6640   PetscScalar     *Aa, *Ba;
6641   MatType          rtype;
6642   Mat_SeqAIJ      *a, *b;
6643   PetscObjectState state;
6644   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6645   PetscCall(PetscCalloc1(Bnnz, &Ba));
6646   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6647   if (cstart) {
6648     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6649   }
6650 
6651   PetscCall(MatGetRootType_Private(mat, &rtype));
6652 
6653   MatSeqXAIJGetOptions_Private(mpiaij->A);
6654   PetscCall(MatDestroy(&mpiaij->A));
6655   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6656   PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat));
6657   MatSeqXAIJRestoreOptions_Private(mpiaij->A);
6658 
6659   MatSeqXAIJGetOptions_Private(mpiaij->B);
6660   PetscCall(MatDestroy(&mpiaij->B));
6661   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6662   PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat));
6663   MatSeqXAIJRestoreOptions_Private(mpiaij->B);
6664 
6665   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6666   mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ
6667   state              = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate;
6668   PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
6669 
6670   a          = (Mat_SeqAIJ *)mpiaij->A->data;
6671   b          = (Mat_SeqAIJ *)mpiaij->B->data;
6672   a->free_a  = PETSC_TRUE;
6673   a->free_ij = PETSC_TRUE;
6674   b->free_a  = PETSC_TRUE;
6675   b->free_ij = PETSC_TRUE;
6676   a->maxnz   = a->nz;
6677   b->maxnz   = b->nz;
6678 
6679   /* conversion must happen AFTER multiply setup */
6680   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6681   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6682   PetscCall(VecDestroy(&mpiaij->lvec));
6683   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6684 
6685   // Put the COO struct in a container and then attach that to the matrix
6686   PetscCall(PetscMalloc1(1, &coo));
6687   coo->n       = coo_n;
6688   coo->sf      = sf2;
6689   coo->sendlen = nleaves;
6690   coo->recvlen = nroots;
6691   coo->Annz    = Annz;
6692   coo->Bnnz    = Bnnz;
6693   coo->Annz2   = Annz2;
6694   coo->Bnnz2   = Bnnz2;
6695   coo->Atot1   = Atot1;
6696   coo->Atot2   = Atot2;
6697   coo->Btot1   = Btot1;
6698   coo->Btot2   = Btot2;
6699   coo->Ajmap1  = Ajmap1;
6700   coo->Aperm1  = Aperm1;
6701   coo->Bjmap1  = Bjmap1;
6702   coo->Bperm1  = Bperm1;
6703   coo->Aimap2  = Aimap2;
6704   coo->Ajmap2  = Ajmap2;
6705   coo->Aperm2  = Aperm2;
6706   coo->Bimap2  = Bimap2;
6707   coo->Bjmap2  = Bjmap2;
6708   coo->Bperm2  = Bperm2;
6709   coo->Cperm1  = Cperm1;
6710   // Allocate in preallocation. If not used, it has zero cost on host
6711   PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf));
6712   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
6713   PetscCall(PetscContainerSetPointer(container, coo));
6714   PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ));
6715   PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container));
6716   PetscCall(PetscContainerDestroy(&container));
6717   PetscFunctionReturn(PETSC_SUCCESS);
6718 }
6719 
6720 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6721 {
6722   Mat_MPIAIJ          *mpiaij = (Mat_MPIAIJ *)mat->data;
6723   Mat                  A = mpiaij->A, B = mpiaij->B;
6724   PetscScalar         *Aa, *Ba;
6725   PetscScalar         *sendbuf, *recvbuf;
6726   const PetscCount    *Ajmap1, *Ajmap2, *Aimap2;
6727   const PetscCount    *Bjmap1, *Bjmap2, *Bimap2;
6728   const PetscCount    *Aperm1, *Aperm2, *Bperm1, *Bperm2;
6729   const PetscCount    *Cperm1;
6730   PetscContainer       container;
6731   MatCOOStruct_MPIAIJ *coo;
6732 
6733   PetscFunctionBegin;
6734   PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container));
6735   PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix");
6736   PetscCall(PetscContainerGetPointer(container, (void **)&coo));
6737   sendbuf = coo->sendbuf;
6738   recvbuf = coo->recvbuf;
6739   Ajmap1  = coo->Ajmap1;
6740   Ajmap2  = coo->Ajmap2;
6741   Aimap2  = coo->Aimap2;
6742   Bjmap1  = coo->Bjmap1;
6743   Bjmap2  = coo->Bjmap2;
6744   Bimap2  = coo->Bimap2;
6745   Aperm1  = coo->Aperm1;
6746   Aperm2  = coo->Aperm2;
6747   Bperm1  = coo->Bperm1;
6748   Bperm2  = coo->Bperm2;
6749   Cperm1  = coo->Cperm1;
6750 
6751   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6752   PetscCall(MatSeqAIJGetArray(B, &Ba));
6753 
6754   /* Pack entries to be sent to remote */
6755   for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6756 
6757   /* Send remote entries to their owner and overlap the communication with local computation */
6758   PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6759   /* Add local entries to A and B */
6760   for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6761     PetscScalar sum = 0.0;                     /* Do partial summation first to improve numerical stability */
6762     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6763     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6764   }
6765   for (PetscCount i = 0; i < coo->Bnnz; i++) {
6766     PetscScalar sum = 0.0;
6767     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6768     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6769   }
6770   PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6771 
6772   /* Add received remote entries to A and B */
6773   for (PetscCount i = 0; i < coo->Annz2; i++) {
6774     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6775   }
6776   for (PetscCount i = 0; i < coo->Bnnz2; i++) {
6777     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6778   }
6779   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6780   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6781   PetscFunctionReturn(PETSC_SUCCESS);
6782 }
6783 
6784 /*MC
6785    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6786 
6787    Options Database Keys:
6788 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6789 
6790    Level: beginner
6791 
6792    Notes:
6793    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6794     in this case the values associated with the rows and columns one passes in are set to zero
6795     in the matrix
6796 
6797     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6798     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6799 
6800 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6801 M*/
6802 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6803 {
6804   Mat_MPIAIJ *b;
6805   PetscMPIInt size;
6806 
6807   PetscFunctionBegin;
6808   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6809 
6810   PetscCall(PetscNew(&b));
6811   B->data       = (void *)b;
6812   B->ops[0]     = MatOps_Values;
6813   B->assembled  = PETSC_FALSE;
6814   B->insertmode = NOT_SET_VALUES;
6815   b->size       = size;
6816 
6817   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6818 
6819   /* build cache for off array entries formed */
6820   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6821 
6822   b->donotstash  = PETSC_FALSE;
6823   b->colmap      = NULL;
6824   b->garray      = NULL;
6825   b->roworiented = PETSC_TRUE;
6826 
6827   /* stuff used for matrix vector multiply */
6828   b->lvec  = NULL;
6829   b->Mvctx = NULL;
6830 
6831   /* stuff for MatGetRow() */
6832   b->rowindices   = NULL;
6833   b->rowvalues    = NULL;
6834   b->getrowactive = PETSC_FALSE;
6835 
6836   /* flexible pointer used in CUSPARSE classes */
6837   b->spptr = NULL;
6838 
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6840   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6842   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6843   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6844   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6845   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ));
6846   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6847   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6848   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6849   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6850 #if defined(PETSC_HAVE_CUDA)
6851   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6852 #endif
6853 #if defined(PETSC_HAVE_HIP)
6854   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6855 #endif
6856 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6857   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6858 #endif
6859 #if defined(PETSC_HAVE_MKL_SPARSE)
6860   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6861 #endif
6862   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6863   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6864   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6865   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6866 #if defined(PETSC_HAVE_ELEMENTAL)
6867   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6868 #endif
6869 #if defined(PETSC_HAVE_SCALAPACK)
6870   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6871 #endif
6872   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6873   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6874 #if defined(PETSC_HAVE_HYPRE)
6875   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6876   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6877 #endif
6878   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6879   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6880   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6881   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6882   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6883   PetscFunctionReturn(PETSC_SUCCESS);
6884 }
6885 
6886 /*@
6887   MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6888   and "off-diagonal" part of the matrix in CSR format.
6889 
6890   Collective
6891 
6892   Input Parameters:
6893 + comm - MPI communicator
6894 . m    - number of local rows (Cannot be `PETSC_DECIDE`)
6895 . n    - This value should be the same as the local size used in creating the
6896          x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have
6897          calculated if `N` is given) For square matrices `n` is almost always `m`.
6898 . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6899 . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6900 . i    - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6901 . j    - column indices, which must be local, i.e., based off the start column of the diagonal portion
6902 . a    - matrix values
6903 . oi   - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6904 . oj   - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6905 - oa   - matrix values
6906 
6907   Output Parameter:
6908 . mat - the matrix
6909 
6910   Level: advanced
6911 
6912   Notes:
6913   The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user
6914   must free the arrays once the matrix has been destroyed and not before.
6915 
6916   The `i` and `j` indices are 0 based
6917 
6918   See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6919 
6920   This sets local rows and cannot be used to set off-processor values.
6921 
6922   Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6923   legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6924   not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6925   the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6926   keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6927   communication if it is known that only local entries will be set.
6928 
6929 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6930           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6931 @*/
6932 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6933 {
6934   Mat_MPIAIJ *maij;
6935 
6936   PetscFunctionBegin;
6937   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6938   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6939   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6940   PetscCall(MatCreate(comm, mat));
6941   PetscCall(MatSetSizes(*mat, m, n, M, N));
6942   PetscCall(MatSetType(*mat, MATMPIAIJ));
6943   maij = (Mat_MPIAIJ *)(*mat)->data;
6944 
6945   (*mat)->preallocated = PETSC_TRUE;
6946 
6947   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6948   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6949 
6950   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6951   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6952 
6953   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6954   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6955   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6956   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6957   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6958   PetscFunctionReturn(PETSC_SUCCESS);
6959 }
6960 
6961 typedef struct {
6962   Mat       *mp;    /* intermediate products */
6963   PetscBool *mptmp; /* is the intermediate product temporary ? */
6964   PetscInt   cp;    /* number of intermediate products */
6965 
6966   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6967   PetscInt    *startsj_s, *startsj_r;
6968   PetscScalar *bufa;
6969   Mat          P_oth;
6970 
6971   /* may take advantage of merging product->B */
6972   Mat Bloc; /* B-local by merging diag and off-diag */
6973 
6974   /* cusparse does not have support to split between symbolic and numeric phases.
6975      When api_user is true, we don't need to update the numerical values
6976      of the temporary storage */
6977   PetscBool reusesym;
6978 
6979   /* support for COO values insertion */
6980   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6981   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6982   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6983   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6984   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6985   PetscMemType mtype;
6986 
6987   /* customization */
6988   PetscBool abmerge;
6989   PetscBool P_oth_bind;
6990 } MatMatMPIAIJBACKEND;
6991 
6992 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6993 {
6994   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6995   PetscInt             i;
6996 
6997   PetscFunctionBegin;
6998   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6999   PetscCall(PetscFree(mmdata->bufa));
7000   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
7001   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
7002   PetscCall(MatDestroy(&mmdata->P_oth));
7003   PetscCall(MatDestroy(&mmdata->Bloc));
7004   PetscCall(PetscSFDestroy(&mmdata->sf));
7005   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
7006   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
7007   PetscCall(PetscFree(mmdata->own[0]));
7008   PetscCall(PetscFree(mmdata->own));
7009   PetscCall(PetscFree(mmdata->off[0]));
7010   PetscCall(PetscFree(mmdata->off));
7011   PetscCall(PetscFree(mmdata));
7012   PetscFunctionReturn(PETSC_SUCCESS);
7013 }
7014 
7015 /* Copy selected n entries with indices in idx[] of A to v[].
7016    If idx is NULL, copy the whole data array of A to v[]
7017  */
7018 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7019 {
7020   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7021 
7022   PetscFunctionBegin;
7023   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7024   if (f) {
7025     PetscCall((*f)(A, n, idx, v));
7026   } else {
7027     const PetscScalar *vv;
7028 
7029     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7030     if (n && idx) {
7031       PetscScalar    *w  = v;
7032       const PetscInt *oi = idx;
7033       PetscInt        j;
7034 
7035       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7036     } else {
7037       PetscCall(PetscArraycpy(v, vv, n));
7038     }
7039     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7040   }
7041   PetscFunctionReturn(PETSC_SUCCESS);
7042 }
7043 
7044 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7045 {
7046   MatMatMPIAIJBACKEND *mmdata;
7047   PetscInt             i, n_d, n_o;
7048 
7049   PetscFunctionBegin;
7050   MatCheckProduct(C, 1);
7051   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7052   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7053   if (!mmdata->reusesym) { /* update temporary matrices */
7054     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7055     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7056   }
7057   mmdata->reusesym = PETSC_FALSE;
7058 
7059   for (i = 0; i < mmdata->cp; i++) {
7060     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7061     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7062   }
7063   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7064     PetscInt noff;
7065 
7066     PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff));
7067     if (mmdata->mptmp[i]) continue;
7068     if (noff) {
7069       PetscInt nown;
7070 
7071       PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown));
7072       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7073       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7074       n_o += noff;
7075       n_d += nown;
7076     } else {
7077       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7078 
7079       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7080       n_d += mm->nz;
7081     }
7082   }
7083   if (mmdata->hasoffproc) { /* offprocess insertion */
7084     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7085     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7086   }
7087   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7088   PetscFunctionReturn(PETSC_SUCCESS);
7089 }
7090 
7091 /* Support for Pt * A, A * P, or Pt * A * P */
7092 #define MAX_NUMBER_INTERMEDIATE 4
7093 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7094 {
7095   Mat_Product           *product = C->product;
7096   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7097   Mat_MPIAIJ            *a, *p;
7098   MatMatMPIAIJBACKEND   *mmdata;
7099   ISLocalToGlobalMapping P_oth_l2g = NULL;
7100   IS                     glob      = NULL;
7101   const char            *prefix;
7102   char                   pprefix[256];
7103   const PetscInt        *globidx, *P_oth_idx;
7104   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7105   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7106   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7107                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7108                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7109   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7110 
7111   MatProductType ptype;
7112   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7113   PetscMPIInt    size;
7114 
7115   PetscFunctionBegin;
7116   MatCheckProduct(C, 1);
7117   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7118   ptype = product->type;
7119   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7120     ptype                                          = MATPRODUCT_AB;
7121     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7122   }
7123   switch (ptype) {
7124   case MATPRODUCT_AB:
7125     A          = product->A;
7126     P          = product->B;
7127     m          = A->rmap->n;
7128     n          = P->cmap->n;
7129     M          = A->rmap->N;
7130     N          = P->cmap->N;
7131     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7132     break;
7133   case MATPRODUCT_AtB:
7134     P          = product->A;
7135     A          = product->B;
7136     m          = P->cmap->n;
7137     n          = A->cmap->n;
7138     M          = P->cmap->N;
7139     N          = A->cmap->N;
7140     hasoffproc = PETSC_TRUE;
7141     break;
7142   case MATPRODUCT_PtAP:
7143     A          = product->A;
7144     P          = product->B;
7145     m          = P->cmap->n;
7146     n          = P->cmap->n;
7147     M          = P->cmap->N;
7148     N          = P->cmap->N;
7149     hasoffproc = PETSC_TRUE;
7150     break;
7151   default:
7152     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7153   }
7154   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7155   if (size == 1) hasoffproc = PETSC_FALSE;
7156 
7157   /* defaults */
7158   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7159     mp[i]    = NULL;
7160     mptmp[i] = PETSC_FALSE;
7161     rmapt[i] = -1;
7162     cmapt[i] = -1;
7163     rmapa[i] = NULL;
7164     cmapa[i] = NULL;
7165   }
7166 
7167   /* customization */
7168   PetscCall(PetscNew(&mmdata));
7169   mmdata->reusesym = product->api_user;
7170   if (ptype == MATPRODUCT_AB) {
7171     if (product->api_user) {
7172       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7173       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7174       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7175       PetscOptionsEnd();
7176     } else {
7177       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7178       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7179       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7180       PetscOptionsEnd();
7181     }
7182   } else if (ptype == MATPRODUCT_PtAP) {
7183     if (product->api_user) {
7184       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7185       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7186       PetscOptionsEnd();
7187     } else {
7188       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7189       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7190       PetscOptionsEnd();
7191     }
7192   }
7193   a = (Mat_MPIAIJ *)A->data;
7194   p = (Mat_MPIAIJ *)P->data;
7195   PetscCall(MatSetSizes(C, m, n, M, N));
7196   PetscCall(PetscLayoutSetUp(C->rmap));
7197   PetscCall(PetscLayoutSetUp(C->cmap));
7198   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7199   PetscCall(MatGetOptionsPrefix(C, &prefix));
7200 
7201   cp = 0;
7202   switch (ptype) {
7203   case MATPRODUCT_AB: /* A * P */
7204     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7205 
7206     /* A_diag * P_local (merged or not) */
7207     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7208       /* P is product->B */
7209       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7210       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7211       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7212       PetscCall(MatProductSetFill(mp[cp], product->fill));
7213       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7214       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7215       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7216       mp[cp]->product->api_user = product->api_user;
7217       PetscCall(MatProductSetFromOptions(mp[cp]));
7218       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7219       PetscCall(ISGetIndices(glob, &globidx));
7220       rmapt[cp] = 1;
7221       cmapt[cp] = 2;
7222       cmapa[cp] = globidx;
7223       mptmp[cp] = PETSC_FALSE;
7224       cp++;
7225     } else { /* A_diag * P_diag and A_diag * P_off */
7226       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7227       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7228       PetscCall(MatProductSetFill(mp[cp], product->fill));
7229       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7230       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7231       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7232       mp[cp]->product->api_user = product->api_user;
7233       PetscCall(MatProductSetFromOptions(mp[cp]));
7234       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7235       rmapt[cp] = 1;
7236       cmapt[cp] = 1;
7237       mptmp[cp] = PETSC_FALSE;
7238       cp++;
7239       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7240       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7241       PetscCall(MatProductSetFill(mp[cp], product->fill));
7242       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7243       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7244       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7245       mp[cp]->product->api_user = product->api_user;
7246       PetscCall(MatProductSetFromOptions(mp[cp]));
7247       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7248       rmapt[cp] = 1;
7249       cmapt[cp] = 2;
7250       cmapa[cp] = p->garray;
7251       mptmp[cp] = PETSC_FALSE;
7252       cp++;
7253     }
7254 
7255     /* A_off * P_other */
7256     if (mmdata->P_oth) {
7257       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7258       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7259       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7260       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7261       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7262       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7263       PetscCall(MatProductSetFill(mp[cp], product->fill));
7264       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7265       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7266       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7267       mp[cp]->product->api_user = product->api_user;
7268       PetscCall(MatProductSetFromOptions(mp[cp]));
7269       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7270       rmapt[cp] = 1;
7271       cmapt[cp] = 2;
7272       cmapa[cp] = P_oth_idx;
7273       mptmp[cp] = PETSC_FALSE;
7274       cp++;
7275     }
7276     break;
7277 
7278   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7279     /* A is product->B */
7280     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7281     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7282       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7283       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7284       PetscCall(MatProductSetFill(mp[cp], product->fill));
7285       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7286       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7287       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7288       mp[cp]->product->api_user = product->api_user;
7289       PetscCall(MatProductSetFromOptions(mp[cp]));
7290       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7291       PetscCall(ISGetIndices(glob, &globidx));
7292       rmapt[cp] = 2;
7293       rmapa[cp] = globidx;
7294       cmapt[cp] = 2;
7295       cmapa[cp] = globidx;
7296       mptmp[cp] = PETSC_FALSE;
7297       cp++;
7298     } else {
7299       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7300       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7301       PetscCall(MatProductSetFill(mp[cp], product->fill));
7302       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7303       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7304       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7305       mp[cp]->product->api_user = product->api_user;
7306       PetscCall(MatProductSetFromOptions(mp[cp]));
7307       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7308       PetscCall(ISGetIndices(glob, &globidx));
7309       rmapt[cp] = 1;
7310       cmapt[cp] = 2;
7311       cmapa[cp] = globidx;
7312       mptmp[cp] = PETSC_FALSE;
7313       cp++;
7314       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7315       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7316       PetscCall(MatProductSetFill(mp[cp], product->fill));
7317       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7318       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7319       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7320       mp[cp]->product->api_user = product->api_user;
7321       PetscCall(MatProductSetFromOptions(mp[cp]));
7322       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7323       rmapt[cp] = 2;
7324       rmapa[cp] = p->garray;
7325       cmapt[cp] = 2;
7326       cmapa[cp] = globidx;
7327       mptmp[cp] = PETSC_FALSE;
7328       cp++;
7329     }
7330     break;
7331   case MATPRODUCT_PtAP:
7332     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7333     /* P is product->B */
7334     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7335     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7336     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7337     PetscCall(MatProductSetFill(mp[cp], product->fill));
7338     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7339     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7340     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7341     mp[cp]->product->api_user = product->api_user;
7342     PetscCall(MatProductSetFromOptions(mp[cp]));
7343     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7344     PetscCall(ISGetIndices(glob, &globidx));
7345     rmapt[cp] = 2;
7346     rmapa[cp] = globidx;
7347     cmapt[cp] = 2;
7348     cmapa[cp] = globidx;
7349     mptmp[cp] = PETSC_FALSE;
7350     cp++;
7351     if (mmdata->P_oth) {
7352       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7353       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7354       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name));
7355       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7356       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7357       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7358       PetscCall(MatProductSetFill(mp[cp], product->fill));
7359       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7360       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7361       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7362       mp[cp]->product->api_user = product->api_user;
7363       PetscCall(MatProductSetFromOptions(mp[cp]));
7364       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7365       mptmp[cp] = PETSC_TRUE;
7366       cp++;
7367       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7368       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7369       PetscCall(MatProductSetFill(mp[cp], product->fill));
7370       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7371       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7372       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7373       mp[cp]->product->api_user = product->api_user;
7374       PetscCall(MatProductSetFromOptions(mp[cp]));
7375       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7376       rmapt[cp] = 2;
7377       rmapa[cp] = globidx;
7378       cmapt[cp] = 2;
7379       cmapa[cp] = P_oth_idx;
7380       mptmp[cp] = PETSC_FALSE;
7381       cp++;
7382     }
7383     break;
7384   default:
7385     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7386   }
7387   /* sanity check */
7388   if (size > 1)
7389     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7390 
7391   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7392   for (i = 0; i < cp; i++) {
7393     mmdata->mp[i]    = mp[i];
7394     mmdata->mptmp[i] = mptmp[i];
7395   }
7396   mmdata->cp             = cp;
7397   C->product->data       = mmdata;
7398   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7399   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7400 
7401   /* memory type */
7402   mmdata->mtype = PETSC_MEMTYPE_HOST;
7403   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7404   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7405   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7406   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7407   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7408   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7409 
7410   /* prepare coo coordinates for values insertion */
7411 
7412   /* count total nonzeros of those intermediate seqaij Mats
7413     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7414     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7415     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7416   */
7417   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7418     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7419     if (mptmp[cp]) continue;
7420     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7421       const PetscInt *rmap = rmapa[cp];
7422       const PetscInt  mr   = mp[cp]->rmap->n;
7423       const PetscInt  rs   = C->rmap->rstart;
7424       const PetscInt  re   = C->rmap->rend;
7425       const PetscInt *ii   = mm->i;
7426       for (i = 0; i < mr; i++) {
7427         const PetscInt gr = rmap[i];
7428         const PetscInt nz = ii[i + 1] - ii[i];
7429         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7430         else ncoo_oown += nz;                  /* this row is local */
7431       }
7432     } else ncoo_d += mm->nz;
7433   }
7434 
7435   /*
7436     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7437 
7438     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7439 
7440     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7441 
7442     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7443     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7444     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7445 
7446     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7447     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7448   */
7449   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7450   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7451 
7452   /* gather (i,j) of nonzeros inserted by remote procs */
7453   if (hasoffproc) {
7454     PetscSF  msf;
7455     PetscInt ncoo2, *coo_i2, *coo_j2;
7456 
7457     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7458     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7459     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7460 
7461     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7462       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7463       PetscInt   *idxoff = mmdata->off[cp];
7464       PetscInt   *idxown = mmdata->own[cp];
7465       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7466         const PetscInt *rmap = rmapa[cp];
7467         const PetscInt *cmap = cmapa[cp];
7468         const PetscInt *ii   = mm->i;
7469         PetscInt       *coi  = coo_i + ncoo_o;
7470         PetscInt       *coj  = coo_j + ncoo_o;
7471         const PetscInt  mr   = mp[cp]->rmap->n;
7472         const PetscInt  rs   = C->rmap->rstart;
7473         const PetscInt  re   = C->rmap->rend;
7474         const PetscInt  cs   = C->cmap->rstart;
7475         for (i = 0; i < mr; i++) {
7476           const PetscInt *jj = mm->j + ii[i];
7477           const PetscInt  gr = rmap[i];
7478           const PetscInt  nz = ii[i + 1] - ii[i];
7479           if (gr < rs || gr >= re) { /* this is an offproc row */
7480             for (j = ii[i]; j < ii[i + 1]; j++) {
7481               *coi++    = gr;
7482               *idxoff++ = j;
7483             }
7484             if (!cmapt[cp]) { /* already global */
7485               for (j = 0; j < nz; j++) *coj++ = jj[j];
7486             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7487               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7488             } else { /* offdiag */
7489               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7490             }
7491             ncoo_o += nz;
7492           } else { /* this is a local row */
7493             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7494           }
7495         }
7496       }
7497       mmdata->off[cp + 1] = idxoff;
7498       mmdata->own[cp + 1] = idxown;
7499     }
7500 
7501     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7502     PetscInt incoo_o;
7503     PetscCall(PetscIntCast(ncoo_o, &incoo_o));
7504     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7505     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7506     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7507     ncoo = ncoo_d + ncoo_oown + ncoo2;
7508     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7509     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7510     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7511     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7512     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7513     PetscCall(PetscFree2(coo_i, coo_j));
7514     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7515     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7516     coo_i = coo_i2;
7517     coo_j = coo_j2;
7518   } else { /* no offproc values insertion */
7519     ncoo = ncoo_d;
7520     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7521 
7522     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7523     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7524     PetscCall(PetscSFSetUp(mmdata->sf));
7525   }
7526   mmdata->hasoffproc = hasoffproc;
7527 
7528   /* gather (i,j) of nonzeros inserted locally */
7529   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7530     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7531     PetscInt       *coi  = coo_i + ncoo_d;
7532     PetscInt       *coj  = coo_j + ncoo_d;
7533     const PetscInt *jj   = mm->j;
7534     const PetscInt *ii   = mm->i;
7535     const PetscInt *cmap = cmapa[cp];
7536     const PetscInt *rmap = rmapa[cp];
7537     const PetscInt  mr   = mp[cp]->rmap->n;
7538     const PetscInt  rs   = C->rmap->rstart;
7539     const PetscInt  re   = C->rmap->rend;
7540     const PetscInt  cs   = C->cmap->rstart;
7541 
7542     if (mptmp[cp]) continue;
7543     if (rmapt[cp] == 1) { /* consecutive rows */
7544       /* fill coo_i */
7545       for (i = 0; i < mr; i++) {
7546         const PetscInt gr = i + rs;
7547         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7548       }
7549       /* fill coo_j */
7550       if (!cmapt[cp]) { /* type-0, already global */
7551         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7552       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7553         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7554       } else {                                            /* type-2, local to global for sparse columns */
7555         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7556       }
7557       ncoo_d += mm->nz;
7558     } else if (rmapt[cp] == 2) { /* sparse rows */
7559       for (i = 0; i < mr; i++) {
7560         const PetscInt *jj = mm->j + ii[i];
7561         const PetscInt  gr = rmap[i];
7562         const PetscInt  nz = ii[i + 1] - ii[i];
7563         if (gr >= rs && gr < re) { /* local rows */
7564           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7565           if (!cmapt[cp]) { /* type-0, already global */
7566             for (j = 0; j < nz; j++) *coj++ = jj[j];
7567           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7568             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7569           } else { /* type-2, local to global for sparse columns */
7570             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7571           }
7572           ncoo_d += nz;
7573         }
7574       }
7575     }
7576   }
7577   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7578   PetscCall(ISDestroy(&glob));
7579   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7580   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7581   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7582   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7583 
7584   /* set block sizes */
7585   A = product->A;
7586   P = product->B;
7587   switch (ptype) {
7588   case MATPRODUCT_PtAP:
7589     PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs));
7590     break;
7591   case MATPRODUCT_RARt:
7592     PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs));
7593     break;
7594   case MATPRODUCT_ABC:
7595     PetscCall(MatSetBlockSizesFromMats(C, A, product->C));
7596     break;
7597   case MATPRODUCT_AB:
7598     PetscCall(MatSetBlockSizesFromMats(C, A, P));
7599     break;
7600   case MATPRODUCT_AtB:
7601     PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs));
7602     break;
7603   case MATPRODUCT_ABt:
7604     PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs));
7605     break;
7606   default:
7607     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]);
7608   }
7609 
7610   /* preallocate with COO data */
7611   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7612   PetscCall(PetscFree2(coo_i, coo_j));
7613   PetscFunctionReturn(PETSC_SUCCESS);
7614 }
7615 
7616 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7617 {
7618   Mat_Product *product = mat->product;
7619 #if defined(PETSC_HAVE_DEVICE)
7620   PetscBool match  = PETSC_FALSE;
7621   PetscBool usecpu = PETSC_FALSE;
7622 #else
7623   PetscBool match = PETSC_TRUE;
7624 #endif
7625 
7626   PetscFunctionBegin;
7627   MatCheckProduct(mat, 1);
7628 #if defined(PETSC_HAVE_DEVICE)
7629   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7630   if (match) { /* we can always fallback to the CPU if requested */
7631     switch (product->type) {
7632     case MATPRODUCT_AB:
7633       if (product->api_user) {
7634         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7635         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7636         PetscOptionsEnd();
7637       } else {
7638         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7639         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7640         PetscOptionsEnd();
7641       }
7642       break;
7643     case MATPRODUCT_AtB:
7644       if (product->api_user) {
7645         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7646         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7647         PetscOptionsEnd();
7648       } else {
7649         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7650         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7651         PetscOptionsEnd();
7652       }
7653       break;
7654     case MATPRODUCT_PtAP:
7655       if (product->api_user) {
7656         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7657         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7658         PetscOptionsEnd();
7659       } else {
7660         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7661         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7662         PetscOptionsEnd();
7663       }
7664       break;
7665     default:
7666       break;
7667     }
7668     match = (PetscBool)!usecpu;
7669   }
7670 #endif
7671   if (match) {
7672     switch (product->type) {
7673     case MATPRODUCT_AB:
7674     case MATPRODUCT_AtB:
7675     case MATPRODUCT_PtAP:
7676       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7677       break;
7678     default:
7679       break;
7680     }
7681   }
7682   /* fallback to MPIAIJ ops */
7683   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7684   PetscFunctionReturn(PETSC_SUCCESS);
7685 }
7686 
7687 /*
7688    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7689 
7690    n - the number of block indices in cc[]
7691    cc - the block indices (must be large enough to contain the indices)
7692 */
7693 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7694 {
7695   PetscInt        cnt = -1, nidx, j;
7696   const PetscInt *idx;
7697 
7698   PetscFunctionBegin;
7699   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7700   if (nidx) {
7701     cnt     = 0;
7702     cc[cnt] = idx[0] / bs;
7703     for (j = 1; j < nidx; j++) {
7704       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7705     }
7706   }
7707   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7708   *n = cnt + 1;
7709   PetscFunctionReturn(PETSC_SUCCESS);
7710 }
7711 
7712 /*
7713     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7714 
7715     ncollapsed - the number of block indices
7716     collapsed - the block indices (must be large enough to contain the indices)
7717 */
7718 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7719 {
7720   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7721 
7722   PetscFunctionBegin;
7723   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7724   for (i = start + 1; i < start + bs; i++) {
7725     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7726     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7727     cprevtmp = cprev;
7728     cprev    = merged;
7729     merged   = cprevtmp;
7730   }
7731   *ncollapsed = nprev;
7732   if (collapsed) *collapsed = cprev;
7733   PetscFunctionReturn(PETSC_SUCCESS);
7734 }
7735 
7736 /*
7737  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7738 
7739  Input Parameter:
7740  . Amat - matrix
7741  - symmetrize - make the result symmetric
7742  + scale - scale with diagonal
7743 
7744  Output Parameter:
7745  . a_Gmat - output scalar graph >= 0
7746 
7747 */
7748 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat)
7749 {
7750   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7751   MPI_Comm  comm;
7752   Mat       Gmat;
7753   PetscBool ismpiaij, isseqaij;
7754   Mat       a, b, c;
7755   MatType   jtype;
7756 
7757   PetscFunctionBegin;
7758   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7759   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7760   PetscCall(MatGetSize(Amat, &MM, &NN));
7761   PetscCall(MatGetBlockSize(Amat, &bs));
7762   nloc = (Iend - Istart) / bs;
7763 
7764   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7765   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7766   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7767 
7768   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7769   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7770      implementation */
7771   if (bs > 1) {
7772     PetscCall(MatGetType(Amat, &jtype));
7773     PetscCall(MatCreate(comm, &Gmat));
7774     PetscCall(MatSetType(Gmat, jtype));
7775     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7776     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7777     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7778       PetscInt  *d_nnz, *o_nnz;
7779       MatScalar *aa, val, *AA;
7780       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7781 
7782       if (isseqaij) {
7783         a = Amat;
7784         b = NULL;
7785       } else {
7786         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7787         a             = d->A;
7788         b             = d->B;
7789       }
7790       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7791       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7792       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7793         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7794         const PetscInt *cols1, *cols2;
7795 
7796         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7797           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7798           nnz[brow / bs] = nc2 / bs;
7799           if (nc2 % bs) ok = 0;
7800           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7801           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7802             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7803             if (nc1 != nc2) ok = 0;
7804             else {
7805               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7806                 if (cols1[jj] != cols2[jj]) ok = 0;
7807                 if (cols1[jj] % bs != jj % bs) ok = 0;
7808               }
7809             }
7810             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7811           }
7812           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7813           if (!ok) {
7814             PetscCall(PetscFree2(d_nnz, o_nnz));
7815             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7816             goto old_bs;
7817           }
7818         }
7819       }
7820       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7821       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7822       PetscCall(PetscFree2(d_nnz, o_nnz));
7823       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7824       // diag
7825       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7826         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7827 
7828         ai = aseq->i;
7829         n  = ai[brow + 1] - ai[brow];
7830         aj = aseq->j + ai[brow];
7831         for (PetscInt k = 0; k < n; k += bs) {   // block columns
7832           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7833           val        = 0;
7834           if (index_size == 0) {
7835             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7836               aa = aseq->a + ai[brow + ii] + k;
7837               for (PetscInt jj = 0; jj < bs; jj++) {    // columns in block
7838                 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7839               }
7840             }
7841           } else {                                            // use (index,index) value if provided
7842             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7843               PetscInt ii = index[iii];
7844               aa          = aseq->a + ai[brow + ii] + k;
7845               for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block
7846                 PetscInt jj = index[jjj];
7847                 val += PetscAbs(PetscRealPart(aa[jj]));
7848               }
7849             }
7850           }
7851           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7852           AA[k / bs] = val;
7853         }
7854         grow = Istart / bs + brow / bs;
7855         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES));
7856       }
7857       // off-diag
7858       if (ismpiaij) {
7859         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7860         const PetscScalar *vals;
7861         const PetscInt    *cols, *garray = aij->garray;
7862 
7863         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7864         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7865           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7866           for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7867             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7868             AA[k / bs] = 0;
7869             AJ[cidx]   = garray[cols[k]] / bs;
7870           }
7871           nc = ncols / bs;
7872           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7873           if (index_size == 0) {
7874             for (PetscInt ii = 0; ii < bs; ii++) { // rows in block
7875               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7876               for (PetscInt k = 0; k < ncols; k += bs) {
7877                 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block
7878                   PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax);
7879                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7880                 }
7881               }
7882               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7883             }
7884           } else {                                            // use (index,index) value if provided
7885             for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block
7886               PetscInt ii = index[iii];
7887               PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7888               for (PetscInt k = 0; k < ncols; k += bs) {
7889                 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block
7890                   PetscInt jj = index[jjj];
7891                   AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7892                 }
7893               }
7894               PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7895             }
7896           }
7897           grow = Istart / bs + brow / bs;
7898           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES));
7899         }
7900       }
7901       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7902       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7903       PetscCall(PetscFree2(AA, AJ));
7904     } else {
7905       const PetscScalar *vals;
7906       const PetscInt    *idx;
7907       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7908     old_bs:
7909       /*
7910        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7911        */
7912       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7913       PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz));
7914       if (isseqaij) {
7915         PetscInt max_d_nnz;
7916 
7917         /*
7918          Determine exact preallocation count for (sequential) scalar matrix
7919          */
7920         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7921         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7922         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7923         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7924         PetscCall(PetscFree3(w0, w1, w2));
7925       } else if (ismpiaij) {
7926         Mat             Daij, Oaij;
7927         const PetscInt *garray;
7928         PetscInt        max_d_nnz;
7929 
7930         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7931         /*
7932          Determine exact preallocation count for diagonal block portion of scalar matrix
7933          */
7934         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7935         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7936         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7937         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7938         PetscCall(PetscFree3(w0, w1, w2));
7939         /*
7940          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7941          */
7942         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7943           o_nnz[jj] = 0;
7944           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7945             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7946             o_nnz[jj] += ncols;
7947             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7948           }
7949           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7950         }
7951       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7952       /* get scalar copy (norms) of matrix */
7953       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7954       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7955       PetscCall(PetscFree2(d_nnz, o_nnz));
7956       for (Ii = Istart; Ii < Iend; Ii++) {
7957         PetscInt dest_row = Ii / bs;
7958 
7959         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7960         for (jj = 0; jj < ncols; jj++) {
7961           PetscInt    dest_col = idx[jj] / bs;
7962           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7963 
7964           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7965         }
7966         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7967       }
7968       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7969       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7970     }
7971   } else {
7972     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7973     else {
7974       Gmat = Amat;
7975       PetscCall(PetscObjectReference((PetscObject)Gmat));
7976     }
7977     if (isseqaij) {
7978       a = Gmat;
7979       b = NULL;
7980     } else {
7981       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7982       a             = d->A;
7983       b             = d->B;
7984     }
7985     if (filter >= 0 || scale) {
7986       /* take absolute value of each entry */
7987       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7988         MatInfo      info;
7989         PetscScalar *avals;
7990 
7991         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7992         PetscCall(MatSeqAIJGetArray(c, &avals));
7993         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7994         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7995       }
7996     }
7997   }
7998   if (symmetrize) {
7999     PetscBool isset, issym;
8000 
8001     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8002     if (!isset || !issym) {
8003       Mat matTrans;
8004 
8005       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8006       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8007       PetscCall(MatDestroy(&matTrans));
8008     }
8009     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8010   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8011   if (scale) {
8012     /* scale c for all diagonal values = 1 or -1 */
8013     Vec diag;
8014 
8015     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8016     PetscCall(MatGetDiagonal(Gmat, diag));
8017     PetscCall(VecReciprocal(diag));
8018     PetscCall(VecSqrtAbs(diag));
8019     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8020     PetscCall(VecDestroy(&diag));
8021   }
8022   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8023   if (filter >= 0) {
8024     PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE));
8025     PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view"));
8026   }
8027   *a_Gmat = Gmat;
8028   PetscFunctionReturn(PETSC_SUCCESS);
8029 }
8030 
8031 PETSC_INTERN PetscErrorCode MatGetCurrentMemType_MPIAIJ(Mat A, PetscMemType *memtype)
8032 {
8033   Mat_MPIAIJ  *mpiaij = (Mat_MPIAIJ *)A->data;
8034   PetscMemType mD = PETSC_MEMTYPE_HOST, mO = PETSC_MEMTYPE_HOST;
8035 
8036   PetscFunctionBegin;
8037   if (mpiaij->A) PetscCall(MatGetCurrentMemType(mpiaij->A, &mD));
8038   if (mpiaij->B) PetscCall(MatGetCurrentMemType(mpiaij->B, &mO));
8039   *memtype = (mD == mO) ? mD : PETSC_MEMTYPE_HOST;
8040   PetscFunctionReturn(PETSC_SUCCESS);
8041 }
8042 
8043 /*
8044     Special version for direct calls from Fortran
8045 */
8046 
8047 /* Change these macros so can be used in void function */
8048 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8049 #undef PetscCall
8050 #define PetscCall(...) \
8051   do { \
8052     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8053     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8054       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8055       return; \
8056     } \
8057   } while (0)
8058 
8059 #undef SETERRQ
8060 #define SETERRQ(comm, ierr, ...) \
8061   do { \
8062     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8063     return; \
8064   } while (0)
8065 
8066 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8067   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8068 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8069   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8070 #else
8071 #endif
8072 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8073 {
8074   Mat         mat = *mmat;
8075   PetscInt    m = *mm, n = *mn;
8076   InsertMode  addv = *maddv;
8077   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8078   PetscScalar value;
8079 
8080   MatCheckPreallocated(mat, 1);
8081   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8082   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8083   {
8084     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8085     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8086     PetscBool roworiented = aij->roworiented;
8087 
8088     /* Some Variables required in the macro */
8089     Mat         A     = aij->A;
8090     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8091     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8092     MatScalar  *aa;
8093     PetscBool   ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8094     Mat         B                 = aij->B;
8095     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8096     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8097     MatScalar  *ba;
8098     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8099      * cannot use "#if defined" inside a macro. */
8100     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8101 
8102     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8103     PetscInt   nonew = a->nonew;
8104     MatScalar *ap1, *ap2;
8105 
8106     PetscFunctionBegin;
8107     PetscCall(MatSeqAIJGetArray(A, &aa));
8108     PetscCall(MatSeqAIJGetArray(B, &ba));
8109     for (i = 0; i < m; i++) {
8110       if (im[i] < 0) continue;
8111       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8112       if (im[i] >= rstart && im[i] < rend) {
8113         row      = im[i] - rstart;
8114         lastcol1 = -1;
8115         rp1      = aj + ai[row];
8116         ap1      = aa + ai[row];
8117         rmax1    = aimax[row];
8118         nrow1    = ailen[row];
8119         low1     = 0;
8120         high1    = nrow1;
8121         lastcol2 = -1;
8122         rp2      = bj + bi[row];
8123         ap2      = ba + bi[row];
8124         rmax2    = bimax[row];
8125         nrow2    = bilen[row];
8126         low2     = 0;
8127         high2    = nrow2;
8128 
8129         for (j = 0; j < n; j++) {
8130           if (roworiented) value = v[i * n + j];
8131           else value = v[i + j * m];
8132           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8133           if (in[j] >= cstart && in[j] < cend) {
8134             col = in[j] - cstart;
8135             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8136           } else if (in[j] < 0) continue;
8137           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8138             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8139           } else {
8140             if (mat->was_assembled) {
8141               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8142 #if defined(PETSC_USE_CTABLE)
8143               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8144               col--;
8145 #else
8146               col = aij->colmap[in[j]] - 1;
8147 #endif
8148               if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) {
8149                 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE));
8150                 col = in[j];
8151                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8152                 B        = aij->B;
8153                 b        = (Mat_SeqAIJ *)B->data;
8154                 bimax    = b->imax;
8155                 bi       = b->i;
8156                 bilen    = b->ilen;
8157                 bj       = b->j;
8158                 rp2      = bj + bi[row];
8159                 ap2      = ba + bi[row];
8160                 rmax2    = bimax[row];
8161                 nrow2    = bilen[row];
8162                 low2     = 0;
8163                 high2    = nrow2;
8164                 bm       = aij->B->rmap->n;
8165                 ba       = b->a;
8166                 inserted = PETSC_FALSE;
8167               }
8168             } else col = in[j];
8169             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8170           }
8171         }
8172       } else if (!aij->donotstash) {
8173         if (roworiented) {
8174           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8175         } else {
8176           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8177         }
8178       }
8179     }
8180     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8181     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8182   }
8183   PetscFunctionReturnVoid();
8184 }
8185 
8186 /* Undefining these here since they were redefined from their original definition above! No
8187  * other PETSc functions should be defined past this point, as it is impossible to recover the
8188  * original definitions */
8189 #undef PetscCall
8190 #undef SETERRQ
8191