xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 0462cc06d86a4b04d8da7c4dbbe0d29bc6def07a)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14 #if defined(PETSC_USE_LOG)
15   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
16 #endif
17   PetscCall(MatStashDestroy_Private(&mat->stash));
18   PetscCall(VecDestroy(&aij->diag));
19   PetscCall(MatDestroy(&aij->A));
20   PetscCall(MatDestroy(&aij->B));
21 #if defined(PETSC_USE_CTABLE)
22   PetscCall(PetscHMapIDestroy(&aij->colmap));
23 #else
24   PetscCall(PetscFree(aij->colmap));
25 #endif
26   PetscCall(PetscFree(aij->garray));
27   PetscCall(VecDestroy(&aij->lvec));
28   PetscCall(VecScatterDestroy(&aij->Mvctx));
29   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
30   PetscCall(PetscFree(aij->ld));
31 
32   /* Free COO */
33   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
34 
35   PetscCall(PetscFree(mat->data));
36 
37   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
38   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
39 
40   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
50 #if defined(PETSC_HAVE_CUDA)
51   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
52 #endif
53 #if defined(PETSC_HAVE_HIP)
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
55 #endif
56 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
57   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
58 #endif
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
60 #if defined(PETSC_HAVE_ELEMENTAL)
61   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
62 #endif
63 #if defined(PETSC_HAVE_SCALAPACK)
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
65 #endif
66 #if defined(PETSC_HAVE_HYPRE)
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
69 #endif
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
76 #if defined(PETSC_HAVE_MKL_SPARSE)
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
78 #endif
79   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
80   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
82   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
84   PetscFunctionReturn(PETSC_SUCCESS);
85 }
86 
87 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
88 #define TYPE AIJ
89 #define TYPE_AIJ
90 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
91 #undef TYPE
92 #undef TYPE_AIJ
93 
94 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
95 {
96   Mat B;
97 
98   PetscFunctionBegin;
99   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
100   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
101   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
102   PetscCall(MatDestroy(&B));
103   PetscFunctionReturn(PETSC_SUCCESS);
104 }
105 
106 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
107 {
108   Mat B;
109 
110   PetscFunctionBegin;
111   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
112   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
113   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 /*MC
118    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
119 
120    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
121    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
122   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
123   for communicators controlling multiple processes.  It is recommended that you call both of
124   the above preallocation routines for simplicity.
125 
126    Options Database Key:
127 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
128 
129   Developer Note:
130   Level: beginner
131 
132     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
133    enough exist.
134 
135 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
136 M*/
137 
138 /*MC
139    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
140 
141    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
142    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
143    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
144   for communicators controlling multiple processes.  It is recommended that you call both of
145   the above preallocation routines for simplicity.
146 
147    Options Database Key:
148 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
149 
150   Level: beginner
151 
152 .seealso: [](chapter_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
153 M*/
154 
155 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
156 {
157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
158 
159   PetscFunctionBegin;
160 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
161   A->boundtocpu = flg;
162 #endif
163   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
164   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
165 
166   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
167    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
168    * to differ from the parent matrix. */
169   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
170   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
171 
172   PetscFunctionReturn(PETSC_SUCCESS);
173 }
174 
175 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
176 {
177   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
178 
179   PetscFunctionBegin;
180   if (mat->A) {
181     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
182     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
183   }
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
188 {
189   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
190   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
191   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
192   const PetscInt  *ia, *ib;
193   const MatScalar *aa, *bb, *aav, *bav;
194   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
195   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
196 
197   PetscFunctionBegin;
198   *keptrows = NULL;
199 
200   ia = a->i;
201   ib = b->i;
202   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
203   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
204   for (i = 0; i < m; i++) {
205     na = ia[i + 1] - ia[i];
206     nb = ib[i + 1] - ib[i];
207     if (!na && !nb) {
208       cnt++;
209       goto ok1;
210     }
211     aa = aav + ia[i];
212     for (j = 0; j < na; j++) {
213       if (aa[j] != 0.0) goto ok1;
214     }
215     bb = bav + ib[i];
216     for (j = 0; j < nb; j++) {
217       if (bb[j] != 0.0) goto ok1;
218     }
219     cnt++;
220   ok1:;
221   }
222   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
223   if (!n0rows) {
224     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
225     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
226     PetscFunctionReturn(PETSC_SUCCESS);
227   }
228   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
229   cnt = 0;
230   for (i = 0; i < m; i++) {
231     na = ia[i + 1] - ia[i];
232     nb = ib[i + 1] - ib[i];
233     if (!na && !nb) continue;
234     aa = aav + ia[i];
235     for (j = 0; j < na; j++) {
236       if (aa[j] != 0.0) {
237         rows[cnt++] = rstart + i;
238         goto ok2;
239       }
240     }
241     bb = bav + ib[i];
242     for (j = 0; j < nb; j++) {
243       if (bb[j] != 0.0) {
244         rows[cnt++] = rstart + i;
245         goto ok2;
246       }
247     }
248   ok2:;
249   }
250   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
251   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
252   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
253   PetscFunctionReturn(PETSC_SUCCESS);
254 }
255 
256 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
257 {
258   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
259   PetscBool   cong;
260 
261   PetscFunctionBegin;
262   PetscCall(MatHasCongruentLayouts(Y, &cong));
263   if (Y->assembled && cong) {
264     PetscCall(MatDiagonalSet(aij->A, D, is));
265   } else {
266     PetscCall(MatDiagonalSet_Default(Y, D, is));
267   }
268   PetscFunctionReturn(PETSC_SUCCESS);
269 }
270 
271 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
272 {
273   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
274   PetscInt    i, rstart, nrows, *rows;
275 
276   PetscFunctionBegin;
277   *zrows = NULL;
278   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
279   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
280   for (i = 0; i < nrows; i++) rows[i] += rstart;
281   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
282   PetscFunctionReturn(PETSC_SUCCESS);
283 }
284 
285 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
286 {
287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
288   PetscInt           i, m, n, *garray = aij->garray;
289   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
290   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
291   PetscReal         *work;
292   const PetscScalar *dummy;
293 
294   PetscFunctionBegin;
295   PetscCall(MatGetSize(A, &m, &n));
296   PetscCall(PetscCalloc1(n, &work));
297   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
298   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
299   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
300   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
301   if (type == NORM_2) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
304   } else if (type == NORM_1) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
307   } else if (type == NORM_INFINITY) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
310   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
311     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
312     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
313   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
314     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
315     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
316   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
317   if (type == NORM_INFINITY) {
318     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
319   } else {
320     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
321   }
322   PetscCall(PetscFree(work));
323   if (type == NORM_2) {
324     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
325   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
326     for (i = 0; i < n; i++) reductions[i] /= m;
327   }
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
331 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
332 {
333   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
334   IS              sis, gis;
335   const PetscInt *isis, *igis;
336   PetscInt        n, *iis, nsis, ngis, rstart, i;
337 
338   PetscFunctionBegin;
339   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
340   PetscCall(MatFindNonzeroRows(a->B, &gis));
341   PetscCall(ISGetSize(gis, &ngis));
342   PetscCall(ISGetSize(sis, &nsis));
343   PetscCall(ISGetIndices(sis, &isis));
344   PetscCall(ISGetIndices(gis, &igis));
345 
346   PetscCall(PetscMalloc1(ngis + nsis, &iis));
347   PetscCall(PetscArraycpy(iis, igis, ngis));
348   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
349   n = ngis + nsis;
350   PetscCall(PetscSortRemoveDupsInt(&n, iis));
351   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
352   for (i = 0; i < n; i++) iis[i] += rstart;
353   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
354 
355   PetscCall(ISRestoreIndices(sis, &isis));
356   PetscCall(ISRestoreIndices(gis, &igis));
357   PetscCall(ISDestroy(&sis));
358   PetscCall(ISDestroy(&gis));
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to access.
368 */
369 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
370 {
371   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
372   PetscInt    n   = aij->B->cmap->n, i;
373 
374   PetscFunctionBegin;
375   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
376 #if defined(PETSC_USE_CTABLE)
377   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
378   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
379 #else
380   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
381   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
382 #endif
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
387   { \
388     if (col <= lastcol1) low1 = 0; \
389     else high1 = nrow1; \
390     lastcol1 = col; \
391     while (high1 - low1 > 5) { \
392       t = (low1 + high1) / 2; \
393       if (rp1[t] > col) high1 = t; \
394       else low1 = t; \
395     } \
396     for (_i = low1; _i < high1; _i++) { \
397       if (rp1[_i] > col) break; \
398       if (rp1[_i] == col) { \
399         if (addv == ADD_VALUES) { \
400           ap1[_i] += value; \
401           /* Not sure LogFlops will slow dow the code or not */ \
402           (void)PetscLogFlops(1.0); \
403         } else ap1[_i] = value; \
404         goto a_noinsert; \
405       } \
406     } \
407     if (value == 0.0 && ignorezeroentries && row != col) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     if (nonew == 1) { \
413       low1  = 0; \
414       high1 = nrow1; \
415       goto a_noinsert; \
416     } \
417     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
418     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
419     N = nrow1++ - 1; \
420     a->nz++; \
421     high1++; \
422     /* shift up all the later entries in this row */ \
423     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
424     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
425     rp1[_i] = col; \
426     ap1[_i] = value; \
427     A->nonzerostate++; \
428   a_noinsert:; \
429     ailen[row] = nrow1; \
430   }
431 
432 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
433   { \
434     if (col <= lastcol2) low2 = 0; \
435     else high2 = nrow2; \
436     lastcol2 = col; \
437     while (high2 - low2 > 5) { \
438       t = (low2 + high2) / 2; \
439       if (rp2[t] > col) high2 = t; \
440       else low2 = t; \
441     } \
442     for (_i = low2; _i < high2; _i++) { \
443       if (rp2[_i] > col) break; \
444       if (rp2[_i] == col) { \
445         if (addv == ADD_VALUES) { \
446           ap2[_i] += value; \
447           (void)PetscLogFlops(1.0); \
448         } else ap2[_i] = value; \
449         goto b_noinsert; \
450       } \
451     } \
452     if (value == 0.0 && ignorezeroentries) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     if (nonew == 1) { \
458       low2  = 0; \
459       high2 = nrow2; \
460       goto b_noinsert; \
461     } \
462     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
463     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
464     N = nrow2++ - 1; \
465     b->nz++; \
466     high2++; \
467     /* shift up all the later entries in this row */ \
468     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
469     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
470     rp2[_i] = col; \
471     ap2[_i] = value; \
472     B->nonzerostate++; \
473   b_noinsert:; \
474     bilen[row] = nrow2; \
475   }
476 
477 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
478 {
479   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
480   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
481   PetscInt     l, *garray                         = mat->garray, diag;
482   PetscScalar *aa, *ba;
483 
484   PetscFunctionBegin;
485   /* code only works for square matrices A */
486 
487   /* find size of row to the left of the diagonal part */
488   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
489   row = row - diag;
490   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
491     if (garray[b->j[b->i[row] + l]] > diag) break;
492   }
493   if (l) {
494     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
495     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
496     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
497   }
498 
499   /* diagonal part */
500   if (a->i[row + 1] - a->i[row]) {
501     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
502     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
503     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
504   }
505 
506   /* right of diagonal part */
507   if (b->i[row + 1] - b->i[row] - l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512   PetscFunctionReturn(PETSC_SUCCESS);
513 }
514 
515 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
516 {
517   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
518   PetscScalar value = 0.0;
519   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
520   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
521   PetscBool   roworiented = aij->roworiented;
522 
523   /* Some Variables required in the macro */
524   Mat         A     = aij->A;
525   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
526   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
527   PetscBool   ignorezeroentries = a->ignorezeroentries;
528   Mat         B                 = aij->B;
529   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
530   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
531   MatScalar  *aa, *ba;
532   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
533   PetscInt    nonew;
534   MatScalar  *ap1, *ap2;
535 
536   PetscFunctionBegin;
537   PetscCall(MatSeqAIJGetArray(A, &aa));
538   PetscCall(MatSeqAIJGetArray(B, &ba));
539   for (i = 0; i < m; i++) {
540     if (im[i] < 0) continue;
541     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
542     if (im[i] >= rstart && im[i] < rend) {
543       row      = im[i] - rstart;
544       lastcol1 = -1;
545       rp1      = aj + ai[row];
546       ap1      = aa + ai[row];
547       rmax1    = aimax[row];
548       nrow1    = ailen[row];
549       low1     = 0;
550       high1    = nrow1;
551       lastcol2 = -1;
552       rp2      = bj + bi[row];
553       ap2      = ba + bi[row];
554       rmax2    = bimax[row];
555       nrow2    = bilen[row];
556       low2     = 0;
557       high2    = nrow2;
558 
559       for (j = 0; j < n; j++) {
560         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
561         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
562         if (in[j] >= cstart && in[j] < cend) {
563           col   = in[j] - cstart;
564           nonew = a->nonew;
565           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
566         } else if (in[j] < 0) {
567           continue;
568         } else {
569           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
570           if (mat->was_assembled) {
571             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
572 #if defined(PETSC_USE_CTABLE)
573             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
574             col--;
575 #else
576             col = aij->colmap[in[j]] - 1;
577 #endif
578             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
579               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
580               col = in[j];
581               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
582               B     = aij->B;
583               b     = (Mat_SeqAIJ *)B->data;
584               bimax = b->imax;
585               bi    = b->i;
586               bilen = b->ilen;
587               bj    = b->j;
588               ba    = b->a;
589               rp2   = bj + bi[row];
590               ap2   = ba + bi[row];
591               rmax2 = bimax[row];
592               nrow2 = bilen[row];
593               low2  = 0;
594               high2 = nrow2;
595               bm    = aij->B->rmap->n;
596               ba    = b->a;
597             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
598               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
599                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
600               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
601             }
602           } else col = in[j];
603           nonew = b->nonew;
604           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
605         }
606       }
607     } else {
608       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
609       if (!aij->donotstash) {
610         mat->assembled = PETSC_FALSE;
611         if (roworiented) {
612           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
613         } else {
614           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
615         }
616       }
617     }
618   }
619   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
620   PetscCall(MatSeqAIJRestoreArray(B, &ba));
621   PetscFunctionReturn(PETSC_SUCCESS);
622 }
623 
624 /*
625     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
626     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
627     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
628 */
629 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
630 {
631   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
632   Mat         A      = aij->A; /* diagonal part of the matrix */
633   Mat         B      = aij->B; /* offdiagonal part of the matrix */
634   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
635   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
636   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
637   PetscInt   *ailen = a->ilen, *aj = a->j;
638   PetscInt   *bilen = b->ilen, *bj = b->j;
639   PetscInt    am          = aij->A->rmap->n, j;
640   PetscInt    diag_so_far = 0, dnz;
641   PetscInt    offd_so_far = 0, onz;
642 
643   PetscFunctionBegin;
644   /* Iterate over all rows of the matrix */
645   for (j = 0; j < am; j++) {
646     dnz = onz = 0;
647     /*  Iterate over all non-zero columns of the current row */
648     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
649       /* If column is in the diagonal */
650       if (mat_j[col] >= cstart && mat_j[col] < cend) {
651         aj[diag_so_far++] = mat_j[col] - cstart;
652         dnz++;
653       } else { /* off-diagonal entries */
654         bj[offd_so_far++] = mat_j[col];
655         onz++;
656       }
657     }
658     ailen[j] = dnz;
659     bilen[j] = onz;
660   }
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 /*
665     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
666     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
667     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
668     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
669     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
670 */
671 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
672 {
673   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
674   Mat          A    = aij->A; /* diagonal part of the matrix */
675   Mat          B    = aij->B; /* offdiagonal part of the matrix */
676   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
677   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
678   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
679   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
680   PetscInt    *ailen = a->ilen, *aj = a->j;
681   PetscInt    *bilen = b->ilen, *bj = b->j;
682   PetscInt     am          = aij->A->rmap->n, j;
683   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
684   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
685   PetscScalar *aa = a->a, *ba = b->a;
686 
687   PetscFunctionBegin;
688   /* Iterate over all rows of the matrix */
689   for (j = 0; j < am; j++) {
690     dnz_row = onz_row = 0;
691     rowstart_offd     = full_offd_i[j];
692     rowstart_diag     = full_diag_i[j];
693     /*  Iterate over all non-zero columns of the current row */
694     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
695       /* If column is in the diagonal */
696       if (mat_j[col] >= cstart && mat_j[col] < cend) {
697         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
698         aa[rowstart_diag + dnz_row] = mat_a[col];
699         dnz_row++;
700       } else { /* off-diagonal entries */
701         bj[rowstart_offd + onz_row] = mat_j[col];
702         ba[rowstart_offd + onz_row] = mat_a[col];
703         onz_row++;
704       }
705     }
706     ailen[j] = dnz_row;
707     bilen[j] = onz_row;
708   }
709   PetscFunctionReturn(PETSC_SUCCESS);
710 }
711 
712 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
713 {
714   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
715   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
716   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
717 
718   PetscFunctionBegin;
719   for (i = 0; i < m; i++) {
720     if (idxm[i] < 0) continue; /* negative row */
721     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
722     if (idxm[i] >= rstart && idxm[i] < rend) {
723       row = idxm[i] - rstart;
724       for (j = 0; j < n; j++) {
725         if (idxn[j] < 0) continue; /* negative column */
726         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
727         if (idxn[j] >= cstart && idxn[j] < cend) {
728           col = idxn[j] - cstart;
729           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
730         } else {
731           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
732 #if defined(PETSC_USE_CTABLE)
733           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
734           col--;
735 #else
736           col = aij->colmap[idxn[j]] - 1;
737 #endif
738           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
739           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
740         }
741       }
742     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
743   }
744   PetscFunctionReturn(PETSC_SUCCESS);
745 }
746 
747 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
748 {
749   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
750   PetscInt    nstash, reallocs;
751 
752   PetscFunctionBegin;
753   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
754 
755   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
756   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
757   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
758   PetscFunctionReturn(PETSC_SUCCESS);
759 }
760 
761 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
762 {
763   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
764   PetscMPIInt  n;
765   PetscInt     i, j, rstart, ncols, flg;
766   PetscInt    *row, *col;
767   PetscBool    other_disassembled;
768   PetscScalar *val;
769 
770   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
771 
772   PetscFunctionBegin;
773   if (!aij->donotstash && !mat->nooffprocentries) {
774     while (1) {
775       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
776       if (!flg) break;
777 
778       for (i = 0; i < n;) {
779         /* Now identify the consecutive vals belonging to the same row */
780         for (j = i, rstart = row[j]; j < n; j++) {
781           if (row[j] != rstart) break;
782         }
783         if (j < n) ncols = j - i;
784         else ncols = n - i;
785         /* Now assemble all these values with a single function call */
786         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
787         i = j;
788       }
789     }
790     PetscCall(MatStashScatterEnd_Private(&mat->stash));
791   }
792 #if defined(PETSC_HAVE_DEVICE)
793   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
794   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
795   if (mat->boundtocpu) {
796     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
797     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
798   }
799 #endif
800   PetscCall(MatAssemblyBegin(aij->A, mode));
801   PetscCall(MatAssemblyEnd(aij->A, mode));
802 
803   /* determine if any processor has disassembled, if so we must
804      also disassemble ourself, in order that we may reassemble. */
805   /*
806      if nonzero structure of submatrix B cannot change then we know that
807      no processor disassembled thus we can skip this stuff
808   */
809   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
810     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
811     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
812       PetscCall(MatDisAssemble_MPIAIJ(mat));
813     }
814   }
815   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
816   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
817 #if defined(PETSC_HAVE_DEVICE)
818   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
819 #endif
820   PetscCall(MatAssemblyBegin(aij->B, mode));
821   PetscCall(MatAssemblyEnd(aij->B, mode));
822 
823   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
824 
825   aij->rowvalues = NULL;
826 
827   PetscCall(VecDestroy(&aij->diag));
828 
829   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
830   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
831     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
832     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
833   }
834 #if defined(PETSC_HAVE_DEVICE)
835   mat->offloadmask = PETSC_OFFLOAD_BOTH;
836 #endif
837   PetscFunctionReturn(PETSC_SUCCESS);
838 }
839 
840 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
841 {
842   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
843 
844   PetscFunctionBegin;
845   PetscCall(MatZeroEntries(l->A));
846   PetscCall(MatZeroEntries(l->B));
847   PetscFunctionReturn(PETSC_SUCCESS);
848 }
849 
850 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
851 {
852   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
853   PetscObjectState sA, sB;
854   PetscInt        *lrows;
855   PetscInt         r, len;
856   PetscBool        cong, lch, gch;
857 
858   PetscFunctionBegin;
859   /* get locally owned rows */
860   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
861   PetscCall(MatHasCongruentLayouts(A, &cong));
862   /* fix right hand side if needed */
863   if (x && b) {
864     const PetscScalar *xx;
865     PetscScalar       *bb;
866 
867     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
868     PetscCall(VecGetArrayRead(x, &xx));
869     PetscCall(VecGetArray(b, &bb));
870     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
871     PetscCall(VecRestoreArrayRead(x, &xx));
872     PetscCall(VecRestoreArray(b, &bb));
873   }
874 
875   sA = mat->A->nonzerostate;
876   sB = mat->B->nonzerostate;
877 
878   if (diag != 0.0 && cong) {
879     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
880     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
881   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
882     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
883     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
884     PetscInt    nnwA, nnwB;
885     PetscBool   nnzA, nnzB;
886 
887     nnwA = aijA->nonew;
888     nnwB = aijB->nonew;
889     nnzA = aijA->keepnonzeropattern;
890     nnzB = aijB->keepnonzeropattern;
891     if (!nnzA) {
892       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
893       aijA->nonew = 0;
894     }
895     if (!nnzB) {
896       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
897       aijB->nonew = 0;
898     }
899     /* Must zero here before the next loop */
900     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
901     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
902     for (r = 0; r < len; ++r) {
903       const PetscInt row = lrows[r] + A->rmap->rstart;
904       if (row >= A->cmap->N) continue;
905       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
906     }
907     aijA->nonew = nnwA;
908     aijB->nonew = nnwB;
909   } else {
910     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
911     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
912   }
913   PetscCall(PetscFree(lrows));
914   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
915   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
916 
917   /* reduce nonzerostate */
918   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
919   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
920   if (gch) A->nonzerostate++;
921   PetscFunctionReturn(PETSC_SUCCESS);
922 }
923 
924 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
925 {
926   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
927   PetscMPIInt        n = A->rmap->n;
928   PetscInt           i, j, r, m, len = 0;
929   PetscInt          *lrows, *owners = A->rmap->range;
930   PetscMPIInt        p = 0;
931   PetscSFNode       *rrows;
932   PetscSF            sf;
933   const PetscScalar *xx;
934   PetscScalar       *bb, *mask, *aij_a;
935   Vec                xmask, lmask;
936   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
937   const PetscInt    *aj, *ii, *ridx;
938   PetscScalar       *aa;
939 
940   PetscFunctionBegin;
941   /* Create SF where leaves are input rows and roots are owned rows */
942   PetscCall(PetscMalloc1(n, &lrows));
943   for (r = 0; r < n; ++r) lrows[r] = -1;
944   PetscCall(PetscMalloc1(N, &rrows));
945   for (r = 0; r < N; ++r) {
946     const PetscInt idx = rows[r];
947     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
948     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
949       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
950     }
951     rrows[r].rank  = p;
952     rrows[r].index = rows[r] - owners[p];
953   }
954   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
955   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
956   /* Collect flags for rows to be zeroed */
957   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
958   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
959   PetscCall(PetscSFDestroy(&sf));
960   /* Compress and put in row numbers */
961   for (r = 0; r < n; ++r)
962     if (lrows[r] >= 0) lrows[len++] = r;
963   /* zero diagonal part of matrix */
964   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
965   /* handle off diagonal part of matrix */
966   PetscCall(MatCreateVecs(A, &xmask, NULL));
967   PetscCall(VecDuplicate(l->lvec, &lmask));
968   PetscCall(VecGetArray(xmask, &bb));
969   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
970   PetscCall(VecRestoreArray(xmask, &bb));
971   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
972   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
973   PetscCall(VecDestroy(&xmask));
974   if (x && b) { /* this code is buggy when the row and column layout don't match */
975     PetscBool cong;
976 
977     PetscCall(MatHasCongruentLayouts(A, &cong));
978     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
979     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
980     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
981     PetscCall(VecGetArrayRead(l->lvec, &xx));
982     PetscCall(VecGetArray(b, &bb));
983   }
984   PetscCall(VecGetArray(lmask, &mask));
985   /* remove zeroed rows of off diagonal matrix */
986   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
987   ii = aij->i;
988   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
989   /* loop over all elements of off process part of matrix zeroing removed columns*/
990   if (aij->compressedrow.use) {
991     m    = aij->compressedrow.nrows;
992     ii   = aij->compressedrow.i;
993     ridx = aij->compressedrow.rindex;
994     for (i = 0; i < m; i++) {
995       n  = ii[i + 1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij_a + ii[i];
998 
999       for (j = 0; j < n; j++) {
1000         if (PetscAbsScalar(mask[*aj])) {
1001           if (b) bb[*ridx] -= *aa * xx[*aj];
1002           *aa = 0.0;
1003         }
1004         aa++;
1005         aj++;
1006       }
1007       ridx++;
1008     }
1009   } else { /* do not use compressed row format */
1010     m = l->B->rmap->n;
1011     for (i = 0; i < m; i++) {
1012       n  = ii[i + 1] - ii[i];
1013       aj = aij->j + ii[i];
1014       aa = aij_a + ii[i];
1015       for (j = 0; j < n; j++) {
1016         if (PetscAbsScalar(mask[*aj])) {
1017           if (b) bb[i] -= *aa * xx[*aj];
1018           *aa = 0.0;
1019         }
1020         aa++;
1021         aj++;
1022       }
1023     }
1024   }
1025   if (x && b) {
1026     PetscCall(VecRestoreArray(b, &bb));
1027     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1028   }
1029   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1030   PetscCall(VecRestoreArray(lmask, &mask));
1031   PetscCall(VecDestroy(&lmask));
1032   PetscCall(PetscFree(lrows));
1033 
1034   /* only change matrix nonzero state if pattern was allowed to be changed */
1035   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1036     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1037     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1038   }
1039   PetscFunctionReturn(PETSC_SUCCESS);
1040 }
1041 
1042 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1043 {
1044   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1045   PetscInt    nt;
1046   VecScatter  Mvctx = a->Mvctx;
1047 
1048   PetscFunctionBegin;
1049   PetscCall(VecGetLocalSize(xx, &nt));
1050   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1051   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1052   PetscUseTypeMethod(a->A, mult, xx, yy);
1053   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1054   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1055   PetscFunctionReturn(PETSC_SUCCESS);
1056 }
1057 
1058 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1059 {
1060   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1064   PetscFunctionReturn(PETSC_SUCCESS);
1065 }
1066 
1067 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1068 {
1069   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1070   VecScatter  Mvctx = a->Mvctx;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1074   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1075   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1076   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1077   PetscFunctionReturn(PETSC_SUCCESS);
1078 }
1079 
1080 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1081 {
1082   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1083 
1084   PetscFunctionBegin;
1085   /* do nondiagonal part */
1086   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1087   /* do local part */
1088   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1089   /* add partial results together */
1090   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1091   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1092   PetscFunctionReturn(PETSC_SUCCESS);
1093 }
1094 
1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1096 {
1097   MPI_Comm    comm;
1098   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data;
1099   Mat         Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs;
1100   IS          Me, Notme;
1101   PetscInt    M, N, first, last, *notme, i;
1102   PetscBool   lf;
1103   PetscMPIInt size;
1104 
1105   PetscFunctionBegin;
1106   /* Easy test: symmetric diagonal block */
1107   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1108   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1109   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1110   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1111   PetscCallMPI(MPI_Comm_size(comm, &size));
1112   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1113 
1114   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1115   PetscCall(MatGetSize(Amat, &M, &N));
1116   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1117   PetscCall(PetscMalloc1(N - last + first, &notme));
1118   for (i = 0; i < first; i++) notme[i] = i;
1119   for (i = last; i < M; i++) notme[i - last + first] = i;
1120   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1121   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1122   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1123   Aoff = Aoffs[0];
1124   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1125   Boff = Boffs[0];
1126   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1127   PetscCall(MatDestroyMatrices(1, &Aoffs));
1128   PetscCall(MatDestroyMatrices(1, &Boffs));
1129   PetscCall(ISDestroy(&Me));
1130   PetscCall(ISDestroy(&Notme));
1131   PetscCall(PetscFree(notme));
1132   PetscFunctionReturn(PETSC_SUCCESS);
1133 }
1134 
1135 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1136 {
1137   PetscFunctionBegin;
1138   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1139   PetscFunctionReturn(PETSC_SUCCESS);
1140 }
1141 
1142 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1143 {
1144   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1145 
1146   PetscFunctionBegin;
1147   /* do nondiagonal part */
1148   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1149   /* do local part */
1150   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1151   /* add partial results together */
1152   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1153   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1154   PetscFunctionReturn(PETSC_SUCCESS);
1155 }
1156 
1157 /*
1158   This only works correctly for square matrices where the subblock A->A is the
1159    diagonal block
1160 */
1161 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1162 {
1163   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1164 
1165   PetscFunctionBegin;
1166   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1167   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1168   PetscCall(MatGetDiagonal(a->A, v));
1169   PetscFunctionReturn(PETSC_SUCCESS);
1170 }
1171 
1172 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1173 {
1174   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1175 
1176   PetscFunctionBegin;
1177   PetscCall(MatScale(a->A, aa));
1178   PetscCall(MatScale(a->B, aa));
1179   PetscFunctionReturn(PETSC_SUCCESS);
1180 }
1181 
1182 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1183 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1184 {
1185   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1186 
1187   PetscFunctionBegin;
1188   PetscCall(PetscSFDestroy(&aij->coo_sf));
1189   PetscCall(PetscFree(aij->Aperm1));
1190   PetscCall(PetscFree(aij->Bperm1));
1191   PetscCall(PetscFree(aij->Ajmap1));
1192   PetscCall(PetscFree(aij->Bjmap1));
1193 
1194   PetscCall(PetscFree(aij->Aimap2));
1195   PetscCall(PetscFree(aij->Bimap2));
1196   PetscCall(PetscFree(aij->Aperm2));
1197   PetscCall(PetscFree(aij->Bperm2));
1198   PetscCall(PetscFree(aij->Ajmap2));
1199   PetscCall(PetscFree(aij->Bjmap2));
1200 
1201   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1202   PetscCall(PetscFree(aij->Cperm1));
1203   PetscFunctionReturn(PETSC_SUCCESS);
1204 }
1205 
1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1207 {
1208   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1209   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1210   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1211   const PetscInt    *garray = aij->garray;
1212   const PetscScalar *aa, *ba;
1213   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
1214   PetscInt          *rowlens;
1215   PetscInt          *colidxs;
1216   PetscScalar       *matvals;
1217 
1218   PetscFunctionBegin;
1219   PetscCall(PetscViewerSetUp(viewer));
1220 
1221   M  = mat->rmap->N;
1222   N  = mat->cmap->N;
1223   m  = mat->rmap->n;
1224   rs = mat->rmap->rstart;
1225   cs = mat->cmap->rstart;
1226   nz = A->nz + B->nz;
1227 
1228   /* write matrix header */
1229   header[0] = MAT_FILE_CLASSID;
1230   header[1] = M;
1231   header[2] = N;
1232   header[3] = nz;
1233   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1234   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1235 
1236   /* fill in and store row lengths  */
1237   PetscCall(PetscMalloc1(m, &rowlens));
1238   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1239   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1240   PetscCall(PetscFree(rowlens));
1241 
1242   /* fill in and store column indices */
1243   PetscCall(PetscMalloc1(nz, &colidxs));
1244   for (cnt = 0, i = 0; i < m; i++) {
1245     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1246       if (garray[B->j[jb]] > cs) break;
1247       colidxs[cnt++] = garray[B->j[jb]];
1248     }
1249     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1250     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1251   }
1252   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1253   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1254   PetscCall(PetscFree(colidxs));
1255 
1256   /* fill in and store nonzero values */
1257   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1258   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1259   PetscCall(PetscMalloc1(nz, &matvals));
1260   for (cnt = 0, i = 0; i < m; i++) {
1261     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1262       if (garray[B->j[jb]] > cs) break;
1263       matvals[cnt++] = ba[jb];
1264     }
1265     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1266     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1267   }
1268   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1269   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1270   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1271   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1272   PetscCall(PetscFree(matvals));
1273 
1274   /* write block size option to the viewer's .info file */
1275   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1276   PetscFunctionReturn(PETSC_SUCCESS);
1277 }
1278 
1279 #include <petscdraw.h>
1280 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1281 {
1282   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1283   PetscMPIInt       rank = aij->rank, size = aij->size;
1284   PetscBool         isdraw, iascii, isbinary;
1285   PetscViewer       sviewer;
1286   PetscViewerFormat format;
1287 
1288   PetscFunctionBegin;
1289   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1290   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1291   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1292   if (iascii) {
1293     PetscCall(PetscViewerGetFormat(viewer, &format));
1294     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1295       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1296       PetscCall(PetscMalloc1(size, &nz));
1297       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1298       for (i = 0; i < (PetscInt)size; i++) {
1299         nmax = PetscMax(nmax, nz[i]);
1300         nmin = PetscMin(nmin, nz[i]);
1301         navg += nz[i];
1302       }
1303       PetscCall(PetscFree(nz));
1304       navg = navg / size;
1305       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1306       PetscFunctionReturn(PETSC_SUCCESS);
1307     }
1308     PetscCall(PetscViewerGetFormat(viewer, &format));
1309     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1310       MatInfo   info;
1311       PetscInt *inodes = NULL;
1312 
1313       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1314       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1315       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1316       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1317       if (!inodes) {
1318         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1319                                                      (double)info.memory));
1320       } else {
1321         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1322                                                      (double)info.memory));
1323       }
1324       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1325       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1326       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1327       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1328       PetscCall(PetscViewerFlush(viewer));
1329       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1330       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1331       PetscCall(VecScatterView(aij->Mvctx, viewer));
1332       PetscFunctionReturn(PETSC_SUCCESS);
1333     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1334       PetscInt inodecount, inodelimit, *inodes;
1335       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1336       if (inodes) {
1337         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1338       } else {
1339         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1340       }
1341       PetscFunctionReturn(PETSC_SUCCESS);
1342     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1343       PetscFunctionReturn(PETSC_SUCCESS);
1344     }
1345   } else if (isbinary) {
1346     if (size == 1) {
1347       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1348       PetscCall(MatView(aij->A, viewer));
1349     } else {
1350       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1351     }
1352     PetscFunctionReturn(PETSC_SUCCESS);
1353   } else if (iascii && size == 1) {
1354     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1355     PetscCall(MatView(aij->A, viewer));
1356     PetscFunctionReturn(PETSC_SUCCESS);
1357   } else if (isdraw) {
1358     PetscDraw draw;
1359     PetscBool isnull;
1360     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1361     PetscCall(PetscDrawIsNull(draw, &isnull));
1362     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1363   }
1364 
1365   { /* assemble the entire matrix onto first processor */
1366     Mat A = NULL, Av;
1367     IS  isrow, iscol;
1368 
1369     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1370     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1371     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1372     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1373     /*  The commented code uses MatCreateSubMatrices instead */
1374     /*
1375     Mat *AA, A = NULL, Av;
1376     IS  isrow,iscol;
1377 
1378     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1379     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1380     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1381     if (rank == 0) {
1382        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1383        A    = AA[0];
1384        Av   = AA[0];
1385     }
1386     PetscCall(MatDestroySubMatrices(1,&AA));
1387 */
1388     PetscCall(ISDestroy(&iscol));
1389     PetscCall(ISDestroy(&isrow));
1390     /*
1391        Everyone has to call to draw the matrix since the graphics waits are
1392        synchronized across all processors that share the PetscDraw object
1393     */
1394     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1395     if (rank == 0) {
1396       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1397       PetscCall(MatView_SeqAIJ(Av, sviewer));
1398     }
1399     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1400     PetscCall(PetscViewerFlush(viewer));
1401     PetscCall(MatDestroy(&A));
1402   }
1403   PetscFunctionReturn(PETSC_SUCCESS);
1404 }
1405 
1406 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1407 {
1408   PetscBool iascii, isdraw, issocket, isbinary;
1409 
1410   PetscFunctionBegin;
1411   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1412   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1413   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1414   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1415   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1416   PetscFunctionReturn(PETSC_SUCCESS);
1417 }
1418 
1419 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1420 {
1421   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1422   Vec         bb1 = NULL;
1423   PetscBool   hasop;
1424 
1425   PetscFunctionBegin;
1426   if (flag == SOR_APPLY_UPPER) {
1427     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1428     PetscFunctionReturn(PETSC_SUCCESS);
1429   }
1430 
1431   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1432 
1433   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1434     if (flag & SOR_ZERO_INITIAL_GUESS) {
1435       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1436       its--;
1437     }
1438 
1439     while (its--) {
1440       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1441       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1442 
1443       /* update rhs: bb1 = bb - B*x */
1444       PetscCall(VecScale(mat->lvec, -1.0));
1445       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1446 
1447       /* local sweep */
1448       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1449     }
1450   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1451     if (flag & SOR_ZERO_INITIAL_GUESS) {
1452       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1453       its--;
1454     }
1455     while (its--) {
1456       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1457       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1458 
1459       /* update rhs: bb1 = bb - B*x */
1460       PetscCall(VecScale(mat->lvec, -1.0));
1461       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1462 
1463       /* local sweep */
1464       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1465     }
1466   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1467     if (flag & SOR_ZERO_INITIAL_GUESS) {
1468       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1469       its--;
1470     }
1471     while (its--) {
1472       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1473       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1474 
1475       /* update rhs: bb1 = bb - B*x */
1476       PetscCall(VecScale(mat->lvec, -1.0));
1477       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1478 
1479       /* local sweep */
1480       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1481     }
1482   } else if (flag & SOR_EISENSTAT) {
1483     Vec xx1;
1484 
1485     PetscCall(VecDuplicate(bb, &xx1));
1486     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1487 
1488     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1489     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1490     if (!mat->diag) {
1491       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1492       PetscCall(MatGetDiagonal(matin, mat->diag));
1493     }
1494     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1495     if (hasop) {
1496       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1497     } else {
1498       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1499     }
1500     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1501 
1502     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1503 
1504     /* local sweep */
1505     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1506     PetscCall(VecAXPY(xx, 1.0, xx1));
1507     PetscCall(VecDestroy(&xx1));
1508   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1509 
1510   PetscCall(VecDestroy(&bb1));
1511 
1512   matin->factorerrortype = mat->A->factorerrortype;
1513   PetscFunctionReturn(PETSC_SUCCESS);
1514 }
1515 
1516 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1517 {
1518   Mat             aA, aB, Aperm;
1519   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1520   PetscScalar    *aa, *ba;
1521   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1522   PetscSF         rowsf, sf;
1523   IS              parcolp = NULL;
1524   PetscBool       done;
1525 
1526   PetscFunctionBegin;
1527   PetscCall(MatGetLocalSize(A, &m, &n));
1528   PetscCall(ISGetIndices(rowp, &rwant));
1529   PetscCall(ISGetIndices(colp, &cwant));
1530   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1531 
1532   /* Invert row permutation to find out where my rows should go */
1533   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1534   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1535   PetscCall(PetscSFSetFromOptions(rowsf));
1536   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1537   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1538   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1539 
1540   /* Invert column permutation to find out where my columns should go */
1541   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1542   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1543   PetscCall(PetscSFSetFromOptions(sf));
1544   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1545   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1546   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1547   PetscCall(PetscSFDestroy(&sf));
1548 
1549   PetscCall(ISRestoreIndices(rowp, &rwant));
1550   PetscCall(ISRestoreIndices(colp, &cwant));
1551   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1552 
1553   /* Find out where my gcols should go */
1554   PetscCall(MatGetSize(aB, NULL, &ng));
1555   PetscCall(PetscMalloc1(ng, &gcdest));
1556   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1557   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1558   PetscCall(PetscSFSetFromOptions(sf));
1559   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1560   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1561   PetscCall(PetscSFDestroy(&sf));
1562 
1563   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1564   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1565   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1566   for (i = 0; i < m; i++) {
1567     PetscInt    row = rdest[i];
1568     PetscMPIInt rowner;
1569     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1570     for (j = ai[i]; j < ai[i + 1]; j++) {
1571       PetscInt    col = cdest[aj[j]];
1572       PetscMPIInt cowner;
1573       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1574       if (rowner == cowner) dnnz[i]++;
1575       else onnz[i]++;
1576     }
1577     for (j = bi[i]; j < bi[i + 1]; j++) {
1578       PetscInt    col = gcdest[bj[j]];
1579       PetscMPIInt cowner;
1580       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1581       if (rowner == cowner) dnnz[i]++;
1582       else onnz[i]++;
1583     }
1584   }
1585   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1586   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1587   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1588   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1589   PetscCall(PetscSFDestroy(&rowsf));
1590 
1591   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1592   PetscCall(MatSeqAIJGetArray(aA, &aa));
1593   PetscCall(MatSeqAIJGetArray(aB, &ba));
1594   for (i = 0; i < m; i++) {
1595     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1596     PetscInt  j0, rowlen;
1597     rowlen = ai[i + 1] - ai[i];
1598     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1599       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1600       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1601     }
1602     rowlen = bi[i + 1] - bi[i];
1603     for (j0 = j = 0; j < rowlen; j0 = j) {
1604       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1605       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1606     }
1607   }
1608   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1609   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1610   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1611   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1612   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1613   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1614   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1615   PetscCall(PetscFree3(work, rdest, cdest));
1616   PetscCall(PetscFree(gcdest));
1617   if (parcolp) PetscCall(ISDestroy(&colp));
1618   *B = Aperm;
1619   PetscFunctionReturn(PETSC_SUCCESS);
1620 }
1621 
1622 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1623 {
1624   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1625 
1626   PetscFunctionBegin;
1627   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1628   if (ghosts) *ghosts = aij->garray;
1629   PetscFunctionReturn(PETSC_SUCCESS);
1630 }
1631 
1632 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1633 {
1634   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1635   Mat            A = mat->A, B = mat->B;
1636   PetscLogDouble isend[5], irecv[5];
1637 
1638   PetscFunctionBegin;
1639   info->block_size = 1.0;
1640   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1641 
1642   isend[0] = info->nz_used;
1643   isend[1] = info->nz_allocated;
1644   isend[2] = info->nz_unneeded;
1645   isend[3] = info->memory;
1646   isend[4] = info->mallocs;
1647 
1648   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1649 
1650   isend[0] += info->nz_used;
1651   isend[1] += info->nz_allocated;
1652   isend[2] += info->nz_unneeded;
1653   isend[3] += info->memory;
1654   isend[4] += info->mallocs;
1655   if (flag == MAT_LOCAL) {
1656     info->nz_used      = isend[0];
1657     info->nz_allocated = isend[1];
1658     info->nz_unneeded  = isend[2];
1659     info->memory       = isend[3];
1660     info->mallocs      = isend[4];
1661   } else if (flag == MAT_GLOBAL_MAX) {
1662     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1663 
1664     info->nz_used      = irecv[0];
1665     info->nz_allocated = irecv[1];
1666     info->nz_unneeded  = irecv[2];
1667     info->memory       = irecv[3];
1668     info->mallocs      = irecv[4];
1669   } else if (flag == MAT_GLOBAL_SUM) {
1670     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1671 
1672     info->nz_used      = irecv[0];
1673     info->nz_allocated = irecv[1];
1674     info->nz_unneeded  = irecv[2];
1675     info->memory       = irecv[3];
1676     info->mallocs      = irecv[4];
1677   }
1678   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1679   info->fill_ratio_needed = 0;
1680   info->factor_mallocs    = 0;
1681   PetscFunctionReturn(PETSC_SUCCESS);
1682 }
1683 
1684 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1685 {
1686   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1687 
1688   PetscFunctionBegin;
1689   switch (op) {
1690   case MAT_NEW_NONZERO_LOCATIONS:
1691   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1692   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1693   case MAT_KEEP_NONZERO_PATTERN:
1694   case MAT_NEW_NONZERO_LOCATION_ERR:
1695   case MAT_USE_INODES:
1696   case MAT_IGNORE_ZERO_ENTRIES:
1697   case MAT_FORM_EXPLICIT_TRANSPOSE:
1698     MatCheckPreallocated(A, 1);
1699     PetscCall(MatSetOption(a->A, op, flg));
1700     PetscCall(MatSetOption(a->B, op, flg));
1701     break;
1702   case MAT_ROW_ORIENTED:
1703     MatCheckPreallocated(A, 1);
1704     a->roworiented = flg;
1705 
1706     PetscCall(MatSetOption(a->A, op, flg));
1707     PetscCall(MatSetOption(a->B, op, flg));
1708     break;
1709   case MAT_FORCE_DIAGONAL_ENTRIES:
1710   case MAT_SORTED_FULL:
1711     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1712     break;
1713   case MAT_IGNORE_OFF_PROC_ENTRIES:
1714     a->donotstash = flg;
1715     break;
1716   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1717   case MAT_SPD:
1718   case MAT_SYMMETRIC:
1719   case MAT_STRUCTURALLY_SYMMETRIC:
1720   case MAT_HERMITIAN:
1721   case MAT_SYMMETRY_ETERNAL:
1722   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1723   case MAT_SPD_ETERNAL:
1724     /* if the diagonal matrix is square it inherits some of the properties above */
1725     break;
1726   case MAT_SUBMAT_SINGLEIS:
1727     A->submat_singleis = flg;
1728     break;
1729   case MAT_STRUCTURE_ONLY:
1730     /* The option is handled directly by MatSetOption() */
1731     break;
1732   default:
1733     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1734   }
1735   PetscFunctionReturn(PETSC_SUCCESS);
1736 }
1737 
1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1739 {
1740   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1741   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1742   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1743   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1744   PetscInt    *cmap, *idx_p;
1745 
1746   PetscFunctionBegin;
1747   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1748   mat->getrowactive = PETSC_TRUE;
1749 
1750   if (!mat->rowvalues && (idx || v)) {
1751     /*
1752         allocate enough space to hold information from the longest row.
1753     */
1754     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1755     PetscInt    max = 1, tmp;
1756     for (i = 0; i < matin->rmap->n; i++) {
1757       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1758       if (max < tmp) max = tmp;
1759     }
1760     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1761   }
1762 
1763   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1764   lrow = row - rstart;
1765 
1766   pvA = &vworkA;
1767   pcA = &cworkA;
1768   pvB = &vworkB;
1769   pcB = &cworkB;
1770   if (!v) {
1771     pvA = NULL;
1772     pvB = NULL;
1773   }
1774   if (!idx) {
1775     pcA = NULL;
1776     if (!v) pcB = NULL;
1777   }
1778   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1779   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1780   nztot = nzA + nzB;
1781 
1782   cmap = mat->garray;
1783   if (v || idx) {
1784     if (nztot) {
1785       /* Sort by increasing column numbers, assuming A and B already sorted */
1786       PetscInt imark = -1;
1787       if (v) {
1788         *v = v_p = mat->rowvalues;
1789         for (i = 0; i < nzB; i++) {
1790           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1791           else break;
1792         }
1793         imark = i;
1794         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1795         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1796       }
1797       if (idx) {
1798         *idx = idx_p = mat->rowindices;
1799         if (imark > -1) {
1800           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1801         } else {
1802           for (i = 0; i < nzB; i++) {
1803             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1804             else break;
1805           }
1806           imark = i;
1807         }
1808         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1809         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1810       }
1811     } else {
1812       if (idx) *idx = NULL;
1813       if (v) *v = NULL;
1814     }
1815   }
1816   *nz = nztot;
1817   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1818   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1819   PetscFunctionReturn(PETSC_SUCCESS);
1820 }
1821 
1822 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1823 {
1824   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1825 
1826   PetscFunctionBegin;
1827   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1828   aij->getrowactive = PETSC_FALSE;
1829   PetscFunctionReturn(PETSC_SUCCESS);
1830 }
1831 
1832 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1833 {
1834   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1835   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1836   PetscInt         i, j, cstart = mat->cmap->rstart;
1837   PetscReal        sum = 0.0;
1838   const MatScalar *v, *amata, *bmata;
1839 
1840   PetscFunctionBegin;
1841   if (aij->size == 1) {
1842     PetscCall(MatNorm(aij->A, type, norm));
1843   } else {
1844     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1845     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1846     if (type == NORM_FROBENIUS) {
1847       v = amata;
1848       for (i = 0; i < amat->nz; i++) {
1849         sum += PetscRealPart(PetscConj(*v) * (*v));
1850         v++;
1851       }
1852       v = bmata;
1853       for (i = 0; i < bmat->nz; i++) {
1854         sum += PetscRealPart(PetscConj(*v) * (*v));
1855         v++;
1856       }
1857       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1858       *norm = PetscSqrtReal(*norm);
1859       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1860     } else if (type == NORM_1) { /* max column norm */
1861       PetscReal *tmp, *tmp2;
1862       PetscInt  *jj, *garray = aij->garray;
1863       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1864       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1865       *norm = 0.0;
1866       v     = amata;
1867       jj    = amat->j;
1868       for (j = 0; j < amat->nz; j++) {
1869         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1870         v++;
1871       }
1872       v  = bmata;
1873       jj = bmat->j;
1874       for (j = 0; j < bmat->nz; j++) {
1875         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1876         v++;
1877       }
1878       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1879       for (j = 0; j < mat->cmap->N; j++) {
1880         if (tmp2[j] > *norm) *norm = tmp2[j];
1881       }
1882       PetscCall(PetscFree(tmp));
1883       PetscCall(PetscFree(tmp2));
1884       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1885     } else if (type == NORM_INFINITY) { /* max row norm */
1886       PetscReal ntemp = 0.0;
1887       for (j = 0; j < aij->A->rmap->n; j++) {
1888         v   = amata + amat->i[j];
1889         sum = 0.0;
1890         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1891           sum += PetscAbsScalar(*v);
1892           v++;
1893         }
1894         v = bmata + bmat->i[j];
1895         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1896           sum += PetscAbsScalar(*v);
1897           v++;
1898         }
1899         if (sum > ntemp) ntemp = sum;
1900       }
1901       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1902       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1903     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1904     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1905     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1906   }
1907   PetscFunctionReturn(PETSC_SUCCESS);
1908 }
1909 
1910 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1911 {
1912   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1913   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1914   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1915   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1916   Mat              B, A_diag, *B_diag;
1917   const MatScalar *pbv, *bv;
1918 
1919   PetscFunctionBegin;
1920   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1921   ma = A->rmap->n;
1922   na = A->cmap->n;
1923   mb = a->B->rmap->n;
1924   nb = a->B->cmap->n;
1925   ai = Aloc->i;
1926   aj = Aloc->j;
1927   bi = Bloc->i;
1928   bj = Bloc->j;
1929   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1930     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1931     PetscSFNode         *oloc;
1932     PETSC_UNUSED PetscSF sf;
1933 
1934     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1935     /* compute d_nnz for preallocation */
1936     PetscCall(PetscArrayzero(d_nnz, na));
1937     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1938     /* compute local off-diagonal contributions */
1939     PetscCall(PetscArrayzero(g_nnz, nb));
1940     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1941     /* map those to global */
1942     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1943     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1944     PetscCall(PetscSFSetFromOptions(sf));
1945     PetscCall(PetscArrayzero(o_nnz, na));
1946     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1947     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1948     PetscCall(PetscSFDestroy(&sf));
1949 
1950     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1951     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1952     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1953     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1954     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1955     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1956   } else {
1957     B = *matout;
1958     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1959   }
1960 
1961   b           = (Mat_MPIAIJ *)B->data;
1962   A_diag      = a->A;
1963   B_diag      = &b->A;
1964   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1965   A_diag_ncol = A_diag->cmap->N;
1966   B_diag_ilen = sub_B_diag->ilen;
1967   B_diag_i    = sub_B_diag->i;
1968 
1969   /* Set ilen for diagonal of B */
1970   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1971 
1972   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1973   very quickly (=without using MatSetValues), because all writes are local. */
1974   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1975   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1976 
1977   /* copy over the B part */
1978   PetscCall(PetscMalloc1(bi[mb], &cols));
1979   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1980   pbv = bv;
1981   row = A->rmap->rstart;
1982   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1983   cols_tmp = cols;
1984   for (i = 0; i < mb; i++) {
1985     ncol = bi[i + 1] - bi[i];
1986     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1987     row++;
1988     pbv += ncol;
1989     cols_tmp += ncol;
1990   }
1991   PetscCall(PetscFree(cols));
1992   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1993 
1994   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1995   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1996   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1997     *matout = B;
1998   } else {
1999     PetscCall(MatHeaderMerge(A, &B));
2000   }
2001   PetscFunctionReturn(PETSC_SUCCESS);
2002 }
2003 
2004 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
2005 {
2006   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2007   Mat         a = aij->A, b = aij->B;
2008   PetscInt    s1, s2, s3;
2009 
2010   PetscFunctionBegin;
2011   PetscCall(MatGetLocalSize(mat, &s2, &s3));
2012   if (rr) {
2013     PetscCall(VecGetLocalSize(rr, &s1));
2014     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
2015     /* Overlap communication with computation. */
2016     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2017   }
2018   if (ll) {
2019     PetscCall(VecGetLocalSize(ll, &s1));
2020     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2021     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2022   }
2023   /* scale  the diagonal block */
2024   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2025 
2026   if (rr) {
2027     /* Do a scatter end and then right scale the off-diagonal block */
2028     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2029     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2030   }
2031   PetscFunctionReturn(PETSC_SUCCESS);
2032 }
2033 
2034 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2035 {
2036   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2037 
2038   PetscFunctionBegin;
2039   PetscCall(MatSetUnfactored(a->A));
2040   PetscFunctionReturn(PETSC_SUCCESS);
2041 }
2042 
2043 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2044 {
2045   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2046   Mat         a, b, c, d;
2047   PetscBool   flg;
2048 
2049   PetscFunctionBegin;
2050   a = matA->A;
2051   b = matA->B;
2052   c = matB->A;
2053   d = matB->B;
2054 
2055   PetscCall(MatEqual(a, c, &flg));
2056   if (flg) PetscCall(MatEqual(b, d, &flg));
2057   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2058   PetscFunctionReturn(PETSC_SUCCESS);
2059 }
2060 
2061 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2062 {
2063   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2064   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2065 
2066   PetscFunctionBegin;
2067   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2068   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2069     /* because of the column compression in the off-processor part of the matrix a->B,
2070        the number of columns in a->B and b->B may be different, hence we cannot call
2071        the MatCopy() directly on the two parts. If need be, we can provide a more
2072        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2073        then copying the submatrices */
2074     PetscCall(MatCopy_Basic(A, B, str));
2075   } else {
2076     PetscCall(MatCopy(a->A, b->A, str));
2077     PetscCall(MatCopy(a->B, b->B, str));
2078   }
2079   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2080   PetscFunctionReturn(PETSC_SUCCESS);
2081 }
2082 
2083 /*
2084    Computes the number of nonzeros per row needed for preallocation when X and Y
2085    have different nonzero structure.
2086 */
2087 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2088 {
2089   PetscInt i, j, k, nzx, nzy;
2090 
2091   PetscFunctionBegin;
2092   /* Set the number of nonzeros in the new matrix */
2093   for (i = 0; i < m; i++) {
2094     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2095     nzx    = xi[i + 1] - xi[i];
2096     nzy    = yi[i + 1] - yi[i];
2097     nnz[i] = 0;
2098     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2099       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2100       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2101       nnz[i]++;
2102     }
2103     for (; k < nzy; k++) nnz[i]++;
2104   }
2105   PetscFunctionReturn(PETSC_SUCCESS);
2106 }
2107 
2108 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2109 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2110 {
2111   PetscInt    m = Y->rmap->N;
2112   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2113   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2114 
2115   PetscFunctionBegin;
2116   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2117   PetscFunctionReturn(PETSC_SUCCESS);
2118 }
2119 
2120 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2121 {
2122   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2123 
2124   PetscFunctionBegin;
2125   if (str == SAME_NONZERO_PATTERN) {
2126     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2127     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2128   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2129     PetscCall(MatAXPY_Basic(Y, a, X, str));
2130   } else {
2131     Mat       B;
2132     PetscInt *nnz_d, *nnz_o;
2133 
2134     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2135     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2136     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2137     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2138     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2139     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2140     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2141     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2142     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2143     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2144     PetscCall(MatHeaderMerge(Y, &B));
2145     PetscCall(PetscFree(nnz_d));
2146     PetscCall(PetscFree(nnz_o));
2147   }
2148   PetscFunctionReturn(PETSC_SUCCESS);
2149 }
2150 
2151 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2152 
2153 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2154 {
2155   PetscFunctionBegin;
2156   if (PetscDefined(USE_COMPLEX)) {
2157     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2158 
2159     PetscCall(MatConjugate_SeqAIJ(aij->A));
2160     PetscCall(MatConjugate_SeqAIJ(aij->B));
2161   }
2162   PetscFunctionReturn(PETSC_SUCCESS);
2163 }
2164 
2165 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2166 {
2167   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2168 
2169   PetscFunctionBegin;
2170   PetscCall(MatRealPart(a->A));
2171   PetscCall(MatRealPart(a->B));
2172   PetscFunctionReturn(PETSC_SUCCESS);
2173 }
2174 
2175 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2176 {
2177   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2178 
2179   PetscFunctionBegin;
2180   PetscCall(MatImaginaryPart(a->A));
2181   PetscCall(MatImaginaryPart(a->B));
2182   PetscFunctionReturn(PETSC_SUCCESS);
2183 }
2184 
2185 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2186 {
2187   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2188   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2189   PetscScalar       *va, *vv;
2190   Vec                vB, vA;
2191   const PetscScalar *vb;
2192 
2193   PetscFunctionBegin;
2194   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2195   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2196 
2197   PetscCall(VecGetArrayWrite(vA, &va));
2198   if (idx) {
2199     for (i = 0; i < m; i++) {
2200       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2201     }
2202   }
2203 
2204   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2205   PetscCall(PetscMalloc1(m, &idxb));
2206   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2207 
2208   PetscCall(VecGetArrayWrite(v, &vv));
2209   PetscCall(VecGetArrayRead(vB, &vb));
2210   for (i = 0; i < m; i++) {
2211     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2212       vv[i] = vb[i];
2213       if (idx) idx[i] = a->garray[idxb[i]];
2214     } else {
2215       vv[i] = va[i];
2216       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2217     }
2218   }
2219   PetscCall(VecRestoreArrayWrite(vA, &vv));
2220   PetscCall(VecRestoreArrayWrite(vA, &va));
2221   PetscCall(VecRestoreArrayRead(vB, &vb));
2222   PetscCall(PetscFree(idxb));
2223   PetscCall(VecDestroy(&vA));
2224   PetscCall(VecDestroy(&vB));
2225   PetscFunctionReturn(PETSC_SUCCESS);
2226 }
2227 
2228 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2229 {
2230   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2231   PetscInt           m = A->rmap->n, n = A->cmap->n;
2232   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2233   PetscInt          *cmap = mat->garray;
2234   PetscInt          *diagIdx, *offdiagIdx;
2235   Vec                diagV, offdiagV;
2236   PetscScalar       *a, *diagA, *offdiagA;
2237   const PetscScalar *ba, *bav;
2238   PetscInt           r, j, col, ncols, *bi, *bj;
2239   Mat                B = mat->B;
2240   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2241 
2242   PetscFunctionBegin;
2243   /* When a process holds entire A and other processes have no entry */
2244   if (A->cmap->N == n) {
2245     PetscCall(VecGetArrayWrite(v, &diagA));
2246     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2247     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2248     PetscCall(VecDestroy(&diagV));
2249     PetscCall(VecRestoreArrayWrite(v, &diagA));
2250     PetscFunctionReturn(PETSC_SUCCESS);
2251   } else if (n == 0) {
2252     if (m) {
2253       PetscCall(VecGetArrayWrite(v, &a));
2254       for (r = 0; r < m; r++) {
2255         a[r] = 0.0;
2256         if (idx) idx[r] = -1;
2257       }
2258       PetscCall(VecRestoreArrayWrite(v, &a));
2259     }
2260     PetscFunctionReturn(PETSC_SUCCESS);
2261   }
2262 
2263   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2264   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2265   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2266   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2267 
2268   /* Get offdiagIdx[] for implicit 0.0 */
2269   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2270   ba = bav;
2271   bi = b->i;
2272   bj = b->j;
2273   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2274   for (r = 0; r < m; r++) {
2275     ncols = bi[r + 1] - bi[r];
2276     if (ncols == A->cmap->N - n) { /* Brow is dense */
2277       offdiagA[r]   = *ba;
2278       offdiagIdx[r] = cmap[0];
2279     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2280       offdiagA[r] = 0.0;
2281 
2282       /* Find first hole in the cmap */
2283       for (j = 0; j < ncols; j++) {
2284         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2285         if (col > j && j < cstart) {
2286           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2287           break;
2288         } else if (col > j + n && j >= cstart) {
2289           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2290           break;
2291         }
2292       }
2293       if (j == ncols && ncols < A->cmap->N - n) {
2294         /* a hole is outside compressed Bcols */
2295         if (ncols == 0) {
2296           if (cstart) {
2297             offdiagIdx[r] = 0;
2298           } else offdiagIdx[r] = cend;
2299         } else { /* ncols > 0 */
2300           offdiagIdx[r] = cmap[ncols - 1] + 1;
2301           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2302         }
2303       }
2304     }
2305 
2306     for (j = 0; j < ncols; j++) {
2307       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2308         offdiagA[r]   = *ba;
2309         offdiagIdx[r] = cmap[*bj];
2310       }
2311       ba++;
2312       bj++;
2313     }
2314   }
2315 
2316   PetscCall(VecGetArrayWrite(v, &a));
2317   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2318   for (r = 0; r < m; ++r) {
2319     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2320       a[r] = diagA[r];
2321       if (idx) idx[r] = cstart + diagIdx[r];
2322     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2323       a[r] = diagA[r];
2324       if (idx) {
2325         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2326           idx[r] = cstart + diagIdx[r];
2327         } else idx[r] = offdiagIdx[r];
2328       }
2329     } else {
2330       a[r] = offdiagA[r];
2331       if (idx) idx[r] = offdiagIdx[r];
2332     }
2333   }
2334   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2335   PetscCall(VecRestoreArrayWrite(v, &a));
2336   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2337   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2338   PetscCall(VecDestroy(&diagV));
2339   PetscCall(VecDestroy(&offdiagV));
2340   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2341   PetscFunctionReturn(PETSC_SUCCESS);
2342 }
2343 
2344 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2345 {
2346   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2347   PetscInt           m = A->rmap->n, n = A->cmap->n;
2348   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2349   PetscInt          *cmap = mat->garray;
2350   PetscInt          *diagIdx, *offdiagIdx;
2351   Vec                diagV, offdiagV;
2352   PetscScalar       *a, *diagA, *offdiagA;
2353   const PetscScalar *ba, *bav;
2354   PetscInt           r, j, col, ncols, *bi, *bj;
2355   Mat                B = mat->B;
2356   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2357 
2358   PetscFunctionBegin;
2359   /* When a process holds entire A and other processes have no entry */
2360   if (A->cmap->N == n) {
2361     PetscCall(VecGetArrayWrite(v, &diagA));
2362     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2363     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2364     PetscCall(VecDestroy(&diagV));
2365     PetscCall(VecRestoreArrayWrite(v, &diagA));
2366     PetscFunctionReturn(PETSC_SUCCESS);
2367   } else if (n == 0) {
2368     if (m) {
2369       PetscCall(VecGetArrayWrite(v, &a));
2370       for (r = 0; r < m; r++) {
2371         a[r] = PETSC_MAX_REAL;
2372         if (idx) idx[r] = -1;
2373       }
2374       PetscCall(VecRestoreArrayWrite(v, &a));
2375     }
2376     PetscFunctionReturn(PETSC_SUCCESS);
2377   }
2378 
2379   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2380   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2381   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2382   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2383 
2384   /* Get offdiagIdx[] for implicit 0.0 */
2385   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2386   ba = bav;
2387   bi = b->i;
2388   bj = b->j;
2389   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2390   for (r = 0; r < m; r++) {
2391     ncols = bi[r + 1] - bi[r];
2392     if (ncols == A->cmap->N - n) { /* Brow is dense */
2393       offdiagA[r]   = *ba;
2394       offdiagIdx[r] = cmap[0];
2395     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2396       offdiagA[r] = 0.0;
2397 
2398       /* Find first hole in the cmap */
2399       for (j = 0; j < ncols; j++) {
2400         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2401         if (col > j && j < cstart) {
2402           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2403           break;
2404         } else if (col > j + n && j >= cstart) {
2405           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2406           break;
2407         }
2408       }
2409       if (j == ncols && ncols < A->cmap->N - n) {
2410         /* a hole is outside compressed Bcols */
2411         if (ncols == 0) {
2412           if (cstart) {
2413             offdiagIdx[r] = 0;
2414           } else offdiagIdx[r] = cend;
2415         } else { /* ncols > 0 */
2416           offdiagIdx[r] = cmap[ncols - 1] + 1;
2417           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2418         }
2419       }
2420     }
2421 
2422     for (j = 0; j < ncols; j++) {
2423       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2424         offdiagA[r]   = *ba;
2425         offdiagIdx[r] = cmap[*bj];
2426       }
2427       ba++;
2428       bj++;
2429     }
2430   }
2431 
2432   PetscCall(VecGetArrayWrite(v, &a));
2433   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2434   for (r = 0; r < m; ++r) {
2435     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2436       a[r] = diagA[r];
2437       if (idx) idx[r] = cstart + diagIdx[r];
2438     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2439       a[r] = diagA[r];
2440       if (idx) {
2441         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2442           idx[r] = cstart + diagIdx[r];
2443         } else idx[r] = offdiagIdx[r];
2444       }
2445     } else {
2446       a[r] = offdiagA[r];
2447       if (idx) idx[r] = offdiagIdx[r];
2448     }
2449   }
2450   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2451   PetscCall(VecRestoreArrayWrite(v, &a));
2452   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2453   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2454   PetscCall(VecDestroy(&diagV));
2455   PetscCall(VecDestroy(&offdiagV));
2456   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2457   PetscFunctionReturn(PETSC_SUCCESS);
2458 }
2459 
2460 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2461 {
2462   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2463   PetscInt           m = A->rmap->n, n = A->cmap->n;
2464   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2465   PetscInt          *cmap = mat->garray;
2466   PetscInt          *diagIdx, *offdiagIdx;
2467   Vec                diagV, offdiagV;
2468   PetscScalar       *a, *diagA, *offdiagA;
2469   const PetscScalar *ba, *bav;
2470   PetscInt           r, j, col, ncols, *bi, *bj;
2471   Mat                B = mat->B;
2472   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2473 
2474   PetscFunctionBegin;
2475   /* When a process holds entire A and other processes have no entry */
2476   if (A->cmap->N == n) {
2477     PetscCall(VecGetArrayWrite(v, &diagA));
2478     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2479     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2480     PetscCall(VecDestroy(&diagV));
2481     PetscCall(VecRestoreArrayWrite(v, &diagA));
2482     PetscFunctionReturn(PETSC_SUCCESS);
2483   } else if (n == 0) {
2484     if (m) {
2485       PetscCall(VecGetArrayWrite(v, &a));
2486       for (r = 0; r < m; r++) {
2487         a[r] = PETSC_MIN_REAL;
2488         if (idx) idx[r] = -1;
2489       }
2490       PetscCall(VecRestoreArrayWrite(v, &a));
2491     }
2492     PetscFunctionReturn(PETSC_SUCCESS);
2493   }
2494 
2495   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2496   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2497   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2498   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2499 
2500   /* Get offdiagIdx[] for implicit 0.0 */
2501   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2502   ba = bav;
2503   bi = b->i;
2504   bj = b->j;
2505   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2506   for (r = 0; r < m; r++) {
2507     ncols = bi[r + 1] - bi[r];
2508     if (ncols == A->cmap->N - n) { /* Brow is dense */
2509       offdiagA[r]   = *ba;
2510       offdiagIdx[r] = cmap[0];
2511     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2512       offdiagA[r] = 0.0;
2513 
2514       /* Find first hole in the cmap */
2515       for (j = 0; j < ncols; j++) {
2516         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2517         if (col > j && j < cstart) {
2518           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2519           break;
2520         } else if (col > j + n && j >= cstart) {
2521           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2522           break;
2523         }
2524       }
2525       if (j == ncols && ncols < A->cmap->N - n) {
2526         /* a hole is outside compressed Bcols */
2527         if (ncols == 0) {
2528           if (cstart) {
2529             offdiagIdx[r] = 0;
2530           } else offdiagIdx[r] = cend;
2531         } else { /* ncols > 0 */
2532           offdiagIdx[r] = cmap[ncols - 1] + 1;
2533           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2534         }
2535       }
2536     }
2537 
2538     for (j = 0; j < ncols; j++) {
2539       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2540         offdiagA[r]   = *ba;
2541         offdiagIdx[r] = cmap[*bj];
2542       }
2543       ba++;
2544       bj++;
2545     }
2546   }
2547 
2548   PetscCall(VecGetArrayWrite(v, &a));
2549   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2550   for (r = 0; r < m; ++r) {
2551     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2552       a[r] = diagA[r];
2553       if (idx) idx[r] = cstart + diagIdx[r];
2554     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2555       a[r] = diagA[r];
2556       if (idx) {
2557         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2558           idx[r] = cstart + diagIdx[r];
2559         } else idx[r] = offdiagIdx[r];
2560       }
2561     } else {
2562       a[r] = offdiagA[r];
2563       if (idx) idx[r] = offdiagIdx[r];
2564     }
2565   }
2566   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2567   PetscCall(VecRestoreArrayWrite(v, &a));
2568   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2569   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2570   PetscCall(VecDestroy(&diagV));
2571   PetscCall(VecDestroy(&offdiagV));
2572   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2573   PetscFunctionReturn(PETSC_SUCCESS);
2574 }
2575 
2576 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2577 {
2578   Mat *dummy;
2579 
2580   PetscFunctionBegin;
2581   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2582   *newmat = *dummy;
2583   PetscCall(PetscFree(dummy));
2584   PetscFunctionReturn(PETSC_SUCCESS);
2585 }
2586 
2587 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2588 {
2589   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2590 
2591   PetscFunctionBegin;
2592   PetscCall(MatInvertBlockDiagonal(a->A, values));
2593   A->factorerrortype = a->A->factorerrortype;
2594   PetscFunctionReturn(PETSC_SUCCESS);
2595 }
2596 
2597 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2598 {
2599   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2600 
2601   PetscFunctionBegin;
2602   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2603   PetscCall(MatSetRandom(aij->A, rctx));
2604   if (x->assembled) {
2605     PetscCall(MatSetRandom(aij->B, rctx));
2606   } else {
2607     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2608   }
2609   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2610   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2611   PetscFunctionReturn(PETSC_SUCCESS);
2612 }
2613 
2614 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2615 {
2616   PetscFunctionBegin;
2617   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2618   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2619   PetscFunctionReturn(PETSC_SUCCESS);
2620 }
2621 
2622 /*@
2623    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2624 
2625    Not Collective
2626 
2627    Input Parameter:
2628 .    A - the matrix
2629 
2630    Output Parameter:
2631 .    nz - the number of nonzeros
2632 
2633  Level: advanced
2634 
2635 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `Mat`
2636 @*/
2637 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2638 {
2639   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2640   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2641 
2642   PetscFunctionBegin;
2643   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2644   PetscFunctionReturn(PETSC_SUCCESS);
2645 }
2646 
2647 /*@
2648    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2649 
2650    Collective
2651 
2652    Input Parameters:
2653 +    A - the matrix
2654 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2655 
2656  Level: advanced
2657 
2658 .seealso: [](chapter_matrices), `Mat`, `Mat`, `MATMPIAIJ`
2659 @*/
2660 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2661 {
2662   PetscFunctionBegin;
2663   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2664   PetscFunctionReturn(PETSC_SUCCESS);
2665 }
2666 
2667 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2668 {
2669   PetscBool sc = PETSC_FALSE, flg;
2670 
2671   PetscFunctionBegin;
2672   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2673   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2674   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2675   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2676   PetscOptionsHeadEnd();
2677   PetscFunctionReturn(PETSC_SUCCESS);
2678 }
2679 
2680 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2681 {
2682   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2683   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2684 
2685   PetscFunctionBegin;
2686   if (!Y->preallocated) {
2687     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2688   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2689     PetscInt nonew = aij->nonew;
2690     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2691     aij->nonew = nonew;
2692   }
2693   PetscCall(MatShift_Basic(Y, a));
2694   PetscFunctionReturn(PETSC_SUCCESS);
2695 }
2696 
2697 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2698 {
2699   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2700 
2701   PetscFunctionBegin;
2702   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2703   PetscCall(MatMissingDiagonal(a->A, missing, d));
2704   if (d) {
2705     PetscInt rstart;
2706     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2707     *d += rstart;
2708   }
2709   PetscFunctionReturn(PETSC_SUCCESS);
2710 }
2711 
2712 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2713 {
2714   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2715 
2716   PetscFunctionBegin;
2717   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2718   PetscFunctionReturn(PETSC_SUCCESS);
2719 }
2720 
2721 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A)
2722 {
2723   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2724 
2725   PetscFunctionBegin;
2726   PetscCall(MatEliminateZeros(a->A));
2727   PetscCall(MatEliminateZeros(a->B));
2728   PetscFunctionReturn(PETSC_SUCCESS);
2729 }
2730 
2731 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2732                                        MatGetRow_MPIAIJ,
2733                                        MatRestoreRow_MPIAIJ,
2734                                        MatMult_MPIAIJ,
2735                                        /* 4*/ MatMultAdd_MPIAIJ,
2736                                        MatMultTranspose_MPIAIJ,
2737                                        MatMultTransposeAdd_MPIAIJ,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                        /*10*/ NULL,
2742                                        NULL,
2743                                        NULL,
2744                                        MatSOR_MPIAIJ,
2745                                        MatTranspose_MPIAIJ,
2746                                        /*15*/ MatGetInfo_MPIAIJ,
2747                                        MatEqual_MPIAIJ,
2748                                        MatGetDiagonal_MPIAIJ,
2749                                        MatDiagonalScale_MPIAIJ,
2750                                        MatNorm_MPIAIJ,
2751                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2752                                        MatAssemblyEnd_MPIAIJ,
2753                                        MatSetOption_MPIAIJ,
2754                                        MatZeroEntries_MPIAIJ,
2755                                        /*24*/ MatZeroRows_MPIAIJ,
2756                                        NULL,
2757                                        NULL,
2758                                        NULL,
2759                                        NULL,
2760                                        /*29*/ MatSetUp_MPI_Hash,
2761                                        NULL,
2762                                        NULL,
2763                                        MatGetDiagonalBlock_MPIAIJ,
2764                                        NULL,
2765                                        /*34*/ MatDuplicate_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        NULL,
2769                                        NULL,
2770                                        /*39*/ MatAXPY_MPIAIJ,
2771                                        MatCreateSubMatrices_MPIAIJ,
2772                                        MatIncreaseOverlap_MPIAIJ,
2773                                        MatGetValues_MPIAIJ,
2774                                        MatCopy_MPIAIJ,
2775                                        /*44*/ MatGetRowMax_MPIAIJ,
2776                                        MatScale_MPIAIJ,
2777                                        MatShift_MPIAIJ,
2778                                        MatDiagonalSet_MPIAIJ,
2779                                        MatZeroRowsColumns_MPIAIJ,
2780                                        /*49*/ MatSetRandom_MPIAIJ,
2781                                        MatGetRowIJ_MPIAIJ,
2782                                        MatRestoreRowIJ_MPIAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2786                                        NULL,
2787                                        MatSetUnfactored_MPIAIJ,
2788                                        MatPermute_MPIAIJ,
2789                                        NULL,
2790                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2791                                        MatDestroy_MPIAIJ,
2792                                        MatView_MPIAIJ,
2793                                        NULL,
2794                                        NULL,
2795                                        /*64*/ NULL,
2796                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2797                                        NULL,
2798                                        NULL,
2799                                        NULL,
2800                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2801                                        MatGetRowMinAbs_MPIAIJ,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                        NULL,
2806                                        /*75*/ MatFDColoringApply_AIJ,
2807                                        MatSetFromOptions_MPIAIJ,
2808                                        NULL,
2809                                        NULL,
2810                                        MatFindZeroDiagonals_MPIAIJ,
2811                                        /*80*/ NULL,
2812                                        NULL,
2813                                        NULL,
2814                                        /*83*/ MatLoad_MPIAIJ,
2815                                        MatIsSymmetric_MPIAIJ,
2816                                        NULL,
2817                                        NULL,
2818                                        NULL,
2819                                        NULL,
2820                                        /*89*/ NULL,
2821                                        NULL,
2822                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2823                                        NULL,
2824                                        NULL,
2825                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2826                                        NULL,
2827                                        NULL,
2828                                        NULL,
2829                                        MatBindToCPU_MPIAIJ,
2830                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2831                                        NULL,
2832                                        NULL,
2833                                        MatConjugate_MPIAIJ,
2834                                        NULL,
2835                                        /*104*/ MatSetValuesRow_MPIAIJ,
2836                                        MatRealPart_MPIAIJ,
2837                                        MatImaginaryPart_MPIAIJ,
2838                                        NULL,
2839                                        NULL,
2840                                        /*109*/ NULL,
2841                                        NULL,
2842                                        MatGetRowMin_MPIAIJ,
2843                                        NULL,
2844                                        MatMissingDiagonal_MPIAIJ,
2845                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2846                                        NULL,
2847                                        MatGetGhosts_MPIAIJ,
2848                                        NULL,
2849                                        NULL,
2850                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2851                                        NULL,
2852                                        NULL,
2853                                        NULL,
2854                                        MatGetMultiProcBlock_MPIAIJ,
2855                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2856                                        MatGetColumnReductions_MPIAIJ,
2857                                        MatInvertBlockDiagonal_MPIAIJ,
2858                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2859                                        MatCreateSubMatricesMPI_MPIAIJ,
2860                                        /*129*/ NULL,
2861                                        NULL,
2862                                        NULL,
2863                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2864                                        NULL,
2865                                        /*134*/ NULL,
2866                                        NULL,
2867                                        NULL,
2868                                        NULL,
2869                                        NULL,
2870                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2871                                        NULL,
2872                                        NULL,
2873                                        MatFDColoringSetUp_MPIXAIJ,
2874                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2875                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2876                                        /*145*/ NULL,
2877                                        NULL,
2878                                        NULL,
2879                                        MatCreateGraph_Simple_AIJ,
2880                                        NULL,
2881                                        /*150*/ NULL,
2882                                        MatEliminateZeros_MPIAIJ};
2883 
2884 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2885 {
2886   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2887 
2888   PetscFunctionBegin;
2889   PetscCall(MatStoreValues(aij->A));
2890   PetscCall(MatStoreValues(aij->B));
2891   PetscFunctionReturn(PETSC_SUCCESS);
2892 }
2893 
2894 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2895 {
2896   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2897 
2898   PetscFunctionBegin;
2899   PetscCall(MatRetrieveValues(aij->A));
2900   PetscCall(MatRetrieveValues(aij->B));
2901   PetscFunctionReturn(PETSC_SUCCESS);
2902 }
2903 
2904 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2905 {
2906   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2907   PetscMPIInt size;
2908 
2909   PetscFunctionBegin;
2910   if (B->hash_active) {
2911     PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops))));
2912     B->hash_active = PETSC_FALSE;
2913   }
2914   PetscCall(PetscLayoutSetUp(B->rmap));
2915   PetscCall(PetscLayoutSetUp(B->cmap));
2916 
2917 #if defined(PETSC_USE_CTABLE)
2918   PetscCall(PetscHMapIDestroy(&b->colmap));
2919 #else
2920   PetscCall(PetscFree(b->colmap));
2921 #endif
2922   PetscCall(PetscFree(b->garray));
2923   PetscCall(VecDestroy(&b->lvec));
2924   PetscCall(VecScatterDestroy(&b->Mvctx));
2925 
2926   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2927   PetscCall(MatDestroy(&b->B));
2928   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2929   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2930   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2931   PetscCall(MatSetType(b->B, MATSEQAIJ));
2932 
2933   PetscCall(MatDestroy(&b->A));
2934   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2935   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2936   PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2937   PetscCall(MatSetType(b->A, MATSEQAIJ));
2938 
2939   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2940   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2941   B->preallocated  = PETSC_TRUE;
2942   B->was_assembled = PETSC_FALSE;
2943   B->assembled     = PETSC_FALSE;
2944   PetscFunctionReturn(PETSC_SUCCESS);
2945 }
2946 
2947 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2948 {
2949   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2950 
2951   PetscFunctionBegin;
2952   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2953   PetscCall(PetscLayoutSetUp(B->rmap));
2954   PetscCall(PetscLayoutSetUp(B->cmap));
2955 
2956 #if defined(PETSC_USE_CTABLE)
2957   PetscCall(PetscHMapIDestroy(&b->colmap));
2958 #else
2959   PetscCall(PetscFree(b->colmap));
2960 #endif
2961   PetscCall(PetscFree(b->garray));
2962   PetscCall(VecDestroy(&b->lvec));
2963   PetscCall(VecScatterDestroy(&b->Mvctx));
2964 
2965   PetscCall(MatResetPreallocation(b->A));
2966   PetscCall(MatResetPreallocation(b->B));
2967   B->preallocated  = PETSC_TRUE;
2968   B->was_assembled = PETSC_FALSE;
2969   B->assembled     = PETSC_FALSE;
2970   PetscFunctionReturn(PETSC_SUCCESS);
2971 }
2972 
2973 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2974 {
2975   Mat         mat;
2976   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2977 
2978   PetscFunctionBegin;
2979   *newmat = NULL;
2980   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2981   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2982   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2983   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2984   a = (Mat_MPIAIJ *)mat->data;
2985 
2986   mat->factortype   = matin->factortype;
2987   mat->assembled    = matin->assembled;
2988   mat->insertmode   = NOT_SET_VALUES;
2989   mat->preallocated = matin->preallocated;
2990 
2991   a->size         = oldmat->size;
2992   a->rank         = oldmat->rank;
2993   a->donotstash   = oldmat->donotstash;
2994   a->roworiented  = oldmat->roworiented;
2995   a->rowindices   = NULL;
2996   a->rowvalues    = NULL;
2997   a->getrowactive = PETSC_FALSE;
2998 
2999   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3000   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3001 
3002   if (oldmat->colmap) {
3003 #if defined(PETSC_USE_CTABLE)
3004     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3005 #else
3006     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3007     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3008 #endif
3009   } else a->colmap = NULL;
3010   if (oldmat->garray) {
3011     PetscInt len;
3012     len = oldmat->B->cmap->n;
3013     PetscCall(PetscMalloc1(len + 1, &a->garray));
3014     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3015   } else a->garray = NULL;
3016 
3017   /* It may happen MatDuplicate is called with a non-assembled matrix
3018      In fact, MatDuplicate only requires the matrix to be preallocated
3019      This may happen inside a DMCreateMatrix_Shell */
3020   if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
3021   if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
3022   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3023   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3024   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3025   *newmat = mat;
3026   PetscFunctionReturn(PETSC_SUCCESS);
3027 }
3028 
3029 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3030 {
3031   PetscBool isbinary, ishdf5;
3032 
3033   PetscFunctionBegin;
3034   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3035   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3036   /* force binary viewer to load .info file if it has not yet done so */
3037   PetscCall(PetscViewerSetUp(viewer));
3038   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3039   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3040   if (isbinary) {
3041     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3042   } else if (ishdf5) {
3043 #if defined(PETSC_HAVE_HDF5)
3044     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3045 #else
3046     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3047 #endif
3048   } else {
3049     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3050   }
3051   PetscFunctionReturn(PETSC_SUCCESS);
3052 }
3053 
3054 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3055 {
3056   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3057   PetscInt    *rowidxs, *colidxs;
3058   PetscScalar *matvals;
3059 
3060   PetscFunctionBegin;
3061   PetscCall(PetscViewerSetUp(viewer));
3062 
3063   /* read in matrix header */
3064   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3065   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3066   M  = header[1];
3067   N  = header[2];
3068   nz = header[3];
3069   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3070   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3071   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3072 
3073   /* set block sizes from the viewer's .info file */
3074   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3075   /* set global sizes if not set already */
3076   if (mat->rmap->N < 0) mat->rmap->N = M;
3077   if (mat->cmap->N < 0) mat->cmap->N = N;
3078   PetscCall(PetscLayoutSetUp(mat->rmap));
3079   PetscCall(PetscLayoutSetUp(mat->cmap));
3080 
3081   /* check if the matrix sizes are correct */
3082   PetscCall(MatGetSize(mat, &rows, &cols));
3083   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3084 
3085   /* read in row lengths and build row indices */
3086   PetscCall(MatGetLocalSize(mat, &m, NULL));
3087   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3088   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3089   rowidxs[0] = 0;
3090   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3091   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3092   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3093   /* read in column indices and matrix values */
3094   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3095   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3096   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3097   /* store matrix indices and values */
3098   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3099   PetscCall(PetscFree(rowidxs));
3100   PetscCall(PetscFree2(colidxs, matvals));
3101   PetscFunctionReturn(PETSC_SUCCESS);
3102 }
3103 
3104 /* Not scalable because of ISAllGather() unless getting all columns. */
3105 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3106 {
3107   IS          iscol_local;
3108   PetscBool   isstride;
3109   PetscMPIInt lisstride = 0, gisstride;
3110 
3111   PetscFunctionBegin;
3112   /* check if we are grabbing all columns*/
3113   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3114 
3115   if (isstride) {
3116     PetscInt start, len, mstart, mlen;
3117     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3118     PetscCall(ISGetLocalSize(iscol, &len));
3119     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3120     if (mstart == start && mlen - mstart == len) lisstride = 1;
3121   }
3122 
3123   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3124   if (gisstride) {
3125     PetscInt N;
3126     PetscCall(MatGetSize(mat, NULL, &N));
3127     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3128     PetscCall(ISSetIdentity(iscol_local));
3129     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3130   } else {
3131     PetscInt cbs;
3132     PetscCall(ISGetBlockSize(iscol, &cbs));
3133     PetscCall(ISAllGather(iscol, &iscol_local));
3134     PetscCall(ISSetBlockSize(iscol_local, cbs));
3135   }
3136 
3137   *isseq = iscol_local;
3138   PetscFunctionReturn(PETSC_SUCCESS);
3139 }
3140 
3141 /*
3142  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3143  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3144 
3145  Input Parameters:
3146 +   mat - matrix
3147 .   isrow - parallel row index set; its local indices are a subset of local columns of `mat`,
3148            i.e., mat->rstart <= isrow[i] < mat->rend
3149 -   iscol - parallel column index set; its local indices are a subset of local columns of `mat`,
3150            i.e., mat->cstart <= iscol[i] < mat->cend
3151 
3152  Output Parameters:
3153 +   isrow_d - sequential row index set for retrieving mat->A
3154 .   iscol_d - sequential  column index set for retrieving mat->A
3155 .   iscol_o - sequential column index set for retrieving mat->B
3156 -   garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol`
3157  */
3158 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3159 {
3160   Vec             x, cmap;
3161   const PetscInt *is_idx;
3162   PetscScalar    *xarray, *cmaparray;
3163   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3164   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3165   Mat             B    = a->B;
3166   Vec             lvec = a->lvec, lcmap;
3167   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3168   MPI_Comm        comm;
3169   VecScatter      Mvctx = a->Mvctx;
3170 
3171   PetscFunctionBegin;
3172   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3173   PetscCall(ISGetLocalSize(iscol, &ncols));
3174 
3175   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3176   PetscCall(MatCreateVecs(mat, &x, NULL));
3177   PetscCall(VecSet(x, -1.0));
3178   PetscCall(VecDuplicate(x, &cmap));
3179   PetscCall(VecSet(cmap, -1.0));
3180 
3181   /* Get start indices */
3182   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3183   isstart -= ncols;
3184   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3185 
3186   PetscCall(ISGetIndices(iscol, &is_idx));
3187   PetscCall(VecGetArray(x, &xarray));
3188   PetscCall(VecGetArray(cmap, &cmaparray));
3189   PetscCall(PetscMalloc1(ncols, &idx));
3190   for (i = 0; i < ncols; i++) {
3191     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3192     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3193     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3194   }
3195   PetscCall(VecRestoreArray(x, &xarray));
3196   PetscCall(VecRestoreArray(cmap, &cmaparray));
3197   PetscCall(ISRestoreIndices(iscol, &is_idx));
3198 
3199   /* Get iscol_d */
3200   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3201   PetscCall(ISGetBlockSize(iscol, &i));
3202   PetscCall(ISSetBlockSize(*iscol_d, i));
3203 
3204   /* Get isrow_d */
3205   PetscCall(ISGetLocalSize(isrow, &m));
3206   rstart = mat->rmap->rstart;
3207   PetscCall(PetscMalloc1(m, &idx));
3208   PetscCall(ISGetIndices(isrow, &is_idx));
3209   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3210   PetscCall(ISRestoreIndices(isrow, &is_idx));
3211 
3212   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3213   PetscCall(ISGetBlockSize(isrow, &i));
3214   PetscCall(ISSetBlockSize(*isrow_d, i));
3215 
3216   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3217   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3218   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3219 
3220   PetscCall(VecDuplicate(lvec, &lcmap));
3221 
3222   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3223   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3224 
3225   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3226   /* off-process column indices */
3227   count = 0;
3228   PetscCall(PetscMalloc1(Bn, &idx));
3229   PetscCall(PetscMalloc1(Bn, &cmap1));
3230 
3231   PetscCall(VecGetArray(lvec, &xarray));
3232   PetscCall(VecGetArray(lcmap, &cmaparray));
3233   for (i = 0; i < Bn; i++) {
3234     if (PetscRealPart(xarray[i]) > -1.0) {
3235       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3236       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3237       count++;
3238     }
3239   }
3240   PetscCall(VecRestoreArray(lvec, &xarray));
3241   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3242 
3243   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3244   /* cannot ensure iscol_o has same blocksize as iscol! */
3245 
3246   PetscCall(PetscFree(idx));
3247   *garray = cmap1;
3248 
3249   PetscCall(VecDestroy(&x));
3250   PetscCall(VecDestroy(&cmap));
3251   PetscCall(VecDestroy(&lcmap));
3252   PetscFunctionReturn(PETSC_SUCCESS);
3253 }
3254 
3255 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3256 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3257 {
3258   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3259   Mat         M = NULL;
3260   MPI_Comm    comm;
3261   IS          iscol_d, isrow_d, iscol_o;
3262   Mat         Asub = NULL, Bsub = NULL;
3263   PetscInt    n;
3264 
3265   PetscFunctionBegin;
3266   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3267 
3268   if (call == MAT_REUSE_MATRIX) {
3269     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3270     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3271     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3272 
3273     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3274     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3275 
3276     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3277     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3278 
3279     /* Update diagonal and off-diagonal portions of submat */
3280     asub = (Mat_MPIAIJ *)(*submat)->data;
3281     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3282     PetscCall(ISGetLocalSize(iscol_o, &n));
3283     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3284     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3285     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3286 
3287   } else { /* call == MAT_INITIAL_MATRIX) */
3288     const PetscInt *garray;
3289     PetscInt        BsubN;
3290 
3291     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3292     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3293 
3294     /* Create local submatrices Asub and Bsub */
3295     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3296     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3297 
3298     /* Create submatrix M */
3299     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3300 
3301     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3302     asub = (Mat_MPIAIJ *)M->data;
3303 
3304     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3305     n = asub->B->cmap->N;
3306     if (BsubN > n) {
3307       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3308       const PetscInt *idx;
3309       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3310       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3311 
3312       PetscCall(PetscMalloc1(n, &idx_new));
3313       j = 0;
3314       PetscCall(ISGetIndices(iscol_o, &idx));
3315       for (i = 0; i < n; i++) {
3316         if (j >= BsubN) break;
3317         while (subgarray[i] > garray[j]) j++;
3318 
3319         if (subgarray[i] == garray[j]) {
3320           idx_new[i] = idx[j++];
3321         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3322       }
3323       PetscCall(ISRestoreIndices(iscol_o, &idx));
3324 
3325       PetscCall(ISDestroy(&iscol_o));
3326       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3327 
3328     } else if (BsubN < n) {
3329       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3330     }
3331 
3332     PetscCall(PetscFree(garray));
3333     *submat = M;
3334 
3335     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3336     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3337     PetscCall(ISDestroy(&isrow_d));
3338 
3339     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3340     PetscCall(ISDestroy(&iscol_d));
3341 
3342     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3343     PetscCall(ISDestroy(&iscol_o));
3344   }
3345   PetscFunctionReturn(PETSC_SUCCESS);
3346 }
3347 
3348 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3349 {
3350   IS        iscol_local = NULL, isrow_d;
3351   PetscInt  csize;
3352   PetscInt  n, i, j, start, end;
3353   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3354   MPI_Comm  comm;
3355 
3356   PetscFunctionBegin;
3357   /* If isrow has same processor distribution as mat,
3358      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3359   if (call == MAT_REUSE_MATRIX) {
3360     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3361     if (isrow_d) {
3362       sameRowDist  = PETSC_TRUE;
3363       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3364     } else {
3365       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3366       if (iscol_local) {
3367         sameRowDist  = PETSC_TRUE;
3368         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3369       }
3370     }
3371   } else {
3372     /* Check if isrow has same processor distribution as mat */
3373     sameDist[0] = PETSC_FALSE;
3374     PetscCall(ISGetLocalSize(isrow, &n));
3375     if (!n) {
3376       sameDist[0] = PETSC_TRUE;
3377     } else {
3378       PetscCall(ISGetMinMax(isrow, &i, &j));
3379       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3380       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3381     }
3382 
3383     /* Check if iscol has same processor distribution as mat */
3384     sameDist[1] = PETSC_FALSE;
3385     PetscCall(ISGetLocalSize(iscol, &n));
3386     if (!n) {
3387       sameDist[1] = PETSC_TRUE;
3388     } else {
3389       PetscCall(ISGetMinMax(iscol, &i, &j));
3390       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3391       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3392     }
3393 
3394     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3395     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3396     sameRowDist = tsameDist[0];
3397   }
3398 
3399   if (sameRowDist) {
3400     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3401       /* isrow and iscol have same processor distribution as mat */
3402       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3403       PetscFunctionReturn(PETSC_SUCCESS);
3404     } else { /* sameRowDist */
3405       /* isrow has same processor distribution as mat */
3406       if (call == MAT_INITIAL_MATRIX) {
3407         PetscBool sorted;
3408         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3409         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3410         PetscCall(ISGetSize(iscol, &i));
3411         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3412 
3413         PetscCall(ISSorted(iscol_local, &sorted));
3414         if (sorted) {
3415           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3416           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3417           PetscFunctionReturn(PETSC_SUCCESS);
3418         }
3419       } else { /* call == MAT_REUSE_MATRIX */
3420         IS iscol_sub;
3421         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3422         if (iscol_sub) {
3423           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3424           PetscFunctionReturn(PETSC_SUCCESS);
3425         }
3426       }
3427     }
3428   }
3429 
3430   /* General case: iscol -> iscol_local which has global size of iscol */
3431   if (call == MAT_REUSE_MATRIX) {
3432     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3433     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3434   } else {
3435     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3436   }
3437 
3438   PetscCall(ISGetLocalSize(iscol, &csize));
3439   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3440 
3441   if (call == MAT_INITIAL_MATRIX) {
3442     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3443     PetscCall(ISDestroy(&iscol_local));
3444   }
3445   PetscFunctionReturn(PETSC_SUCCESS);
3446 }
3447 
3448 /*@C
3449      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3450          and "off-diagonal" part of the matrix in CSR format.
3451 
3452    Collective
3453 
3454    Input Parameters:
3455 +  comm - MPI communicator
3456 .  A - "diagonal" portion of matrix
3457 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3458 -  garray - global index of `B` columns
3459 
3460    Output Parameter:
3461 .   mat - the matrix, with input `A` as its local diagonal matrix
3462 
3463   Level: advanced
3464 
3465    Notes:
3466    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3467 
3468    `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore.
3469 
3470 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3471 @*/
3472 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3473 {
3474   Mat_MPIAIJ        *maij;
3475   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3476   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3477   const PetscScalar *oa;
3478   Mat                Bnew;
3479   PetscInt           m, n, N;
3480   MatType            mpi_mat_type;
3481 
3482   PetscFunctionBegin;
3483   PetscCall(MatCreate(comm, mat));
3484   PetscCall(MatGetSize(A, &m, &n));
3485   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3486   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3487   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3488   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3489 
3490   /* Get global columns of mat */
3491   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3492 
3493   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3494   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3495   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3496   PetscCall(MatSetType(*mat, mpi_mat_type));
3497 
3498   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3499   maij = (Mat_MPIAIJ *)(*mat)->data;
3500 
3501   (*mat)->preallocated = PETSC_TRUE;
3502 
3503   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3504   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3505 
3506   /* Set A as diagonal portion of *mat */
3507   maij->A = A;
3508 
3509   nz = oi[m];
3510   for (i = 0; i < nz; i++) {
3511     col   = oj[i];
3512     oj[i] = garray[col];
3513   }
3514 
3515   /* Set Bnew as off-diagonal portion of *mat */
3516   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3517   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3518   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3519   bnew        = (Mat_SeqAIJ *)Bnew->data;
3520   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3521   maij->B     = Bnew;
3522 
3523   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3524 
3525   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3526   b->free_a       = PETSC_FALSE;
3527   b->free_ij      = PETSC_FALSE;
3528   PetscCall(MatDestroy(&B));
3529 
3530   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3531   bnew->free_a       = PETSC_TRUE;
3532   bnew->free_ij      = PETSC_TRUE;
3533 
3534   /* condense columns of maij->B */
3535   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3536   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3537   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3538   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3539   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3540   PetscFunctionReturn(PETSC_SUCCESS);
3541 }
3542 
3543 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3544 
3545 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3546 {
3547   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3548   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3549   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3550   Mat             M, Msub, B = a->B;
3551   MatScalar      *aa;
3552   Mat_SeqAIJ     *aij;
3553   PetscInt       *garray = a->garray, *colsub, Ncols;
3554   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3555   IS              iscol_sub, iscmap;
3556   const PetscInt *is_idx, *cmap;
3557   PetscBool       allcolumns = PETSC_FALSE;
3558   MPI_Comm        comm;
3559 
3560   PetscFunctionBegin;
3561   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3562   if (call == MAT_REUSE_MATRIX) {
3563     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3564     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3565     PetscCall(ISGetLocalSize(iscol_sub, &count));
3566 
3567     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3568     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3569 
3570     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3571     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3572 
3573     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3574 
3575   } else { /* call == MAT_INITIAL_MATRIX) */
3576     PetscBool flg;
3577 
3578     PetscCall(ISGetLocalSize(iscol, &n));
3579     PetscCall(ISGetSize(iscol, &Ncols));
3580 
3581     /* (1) iscol -> nonscalable iscol_local */
3582     /* Check for special case: each processor gets entire matrix columns */
3583     PetscCall(ISIdentity(iscol_local, &flg));
3584     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3585     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3586     if (allcolumns) {
3587       iscol_sub = iscol_local;
3588       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3589       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3590 
3591     } else {
3592       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3593       PetscInt *idx, *cmap1, k;
3594       PetscCall(PetscMalloc1(Ncols, &idx));
3595       PetscCall(PetscMalloc1(Ncols, &cmap1));
3596       PetscCall(ISGetIndices(iscol_local, &is_idx));
3597       count = 0;
3598       k     = 0;
3599       for (i = 0; i < Ncols; i++) {
3600         j = is_idx[i];
3601         if (j >= cstart && j < cend) {
3602           /* diagonal part of mat */
3603           idx[count]     = j;
3604           cmap1[count++] = i; /* column index in submat */
3605         } else if (Bn) {
3606           /* off-diagonal part of mat */
3607           if (j == garray[k]) {
3608             idx[count]     = j;
3609             cmap1[count++] = i; /* column index in submat */
3610           } else if (j > garray[k]) {
3611             while (j > garray[k] && k < Bn - 1) k++;
3612             if (j == garray[k]) {
3613               idx[count]     = j;
3614               cmap1[count++] = i; /* column index in submat */
3615             }
3616           }
3617         }
3618       }
3619       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3620 
3621       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3622       PetscCall(ISGetBlockSize(iscol, &cbs));
3623       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3624 
3625       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3626     }
3627 
3628     /* (3) Create sequential Msub */
3629     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3630   }
3631 
3632   PetscCall(ISGetLocalSize(iscol_sub, &count));
3633   aij = (Mat_SeqAIJ *)(Msub)->data;
3634   ii  = aij->i;
3635   PetscCall(ISGetIndices(iscmap, &cmap));
3636 
3637   /*
3638       m - number of local rows
3639       Ncols - number of columns (same on all processors)
3640       rstart - first row in new global matrix generated
3641   */
3642   PetscCall(MatGetSize(Msub, &m, NULL));
3643 
3644   if (call == MAT_INITIAL_MATRIX) {
3645     /* (4) Create parallel newmat */
3646     PetscMPIInt rank, size;
3647     PetscInt    csize;
3648 
3649     PetscCallMPI(MPI_Comm_size(comm, &size));
3650     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3651 
3652     /*
3653         Determine the number of non-zeros in the diagonal and off-diagonal
3654         portions of the matrix in order to do correct preallocation
3655     */
3656 
3657     /* first get start and end of "diagonal" columns */
3658     PetscCall(ISGetLocalSize(iscol, &csize));
3659     if (csize == PETSC_DECIDE) {
3660       PetscCall(ISGetSize(isrow, &mglobal));
3661       if (mglobal == Ncols) { /* square matrix */
3662         nlocal = m;
3663       } else {
3664         nlocal = Ncols / size + ((Ncols % size) > rank);
3665       }
3666     } else {
3667       nlocal = csize;
3668     }
3669     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3670     rstart = rend - nlocal;
3671     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3672 
3673     /* next, compute all the lengths */
3674     jj = aij->j;
3675     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3676     olens = dlens + m;
3677     for (i = 0; i < m; i++) {
3678       jend = ii[i + 1] - ii[i];
3679       olen = 0;
3680       dlen = 0;
3681       for (j = 0; j < jend; j++) {
3682         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3683         else dlen++;
3684         jj++;
3685       }
3686       olens[i] = olen;
3687       dlens[i] = dlen;
3688     }
3689 
3690     PetscCall(ISGetBlockSize(isrow, &bs));
3691     PetscCall(ISGetBlockSize(iscol, &cbs));
3692 
3693     PetscCall(MatCreate(comm, &M));
3694     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3695     PetscCall(MatSetBlockSizes(M, bs, cbs));
3696     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3697     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3698     PetscCall(PetscFree(dlens));
3699 
3700   } else { /* call == MAT_REUSE_MATRIX */
3701     M = *newmat;
3702     PetscCall(MatGetLocalSize(M, &i, NULL));
3703     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3704     PetscCall(MatZeroEntries(M));
3705     /*
3706          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3707        rather than the slower MatSetValues().
3708     */
3709     M->was_assembled = PETSC_TRUE;
3710     M->assembled     = PETSC_FALSE;
3711   }
3712 
3713   /* (5) Set values of Msub to *newmat */
3714   PetscCall(PetscMalloc1(count, &colsub));
3715   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3716 
3717   jj = aij->j;
3718   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3719   for (i = 0; i < m; i++) {
3720     row = rstart + i;
3721     nz  = ii[i + 1] - ii[i];
3722     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3723     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3724     jj += nz;
3725     aa += nz;
3726   }
3727   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3728   PetscCall(ISRestoreIndices(iscmap, &cmap));
3729 
3730   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3731   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3732 
3733   PetscCall(PetscFree(colsub));
3734 
3735   /* save Msub, iscol_sub and iscmap used in processor for next request */
3736   if (call == MAT_INITIAL_MATRIX) {
3737     *newmat = M;
3738     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3739     PetscCall(MatDestroy(&Msub));
3740 
3741     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3742     PetscCall(ISDestroy(&iscol_sub));
3743 
3744     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3745     PetscCall(ISDestroy(&iscmap));
3746 
3747     if (iscol_local) {
3748       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3749       PetscCall(ISDestroy(&iscol_local));
3750     }
3751   }
3752   PetscFunctionReturn(PETSC_SUCCESS);
3753 }
3754 
3755 /*
3756     Not great since it makes two copies of the submatrix, first an SeqAIJ
3757   in local and then by concatenating the local matrices the end result.
3758   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3759 
3760   This requires a sequential iscol with all indices.
3761 */
3762 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3763 {
3764   PetscMPIInt rank, size;
3765   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3766   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3767   Mat         M, Mreuse;
3768   MatScalar  *aa, *vwork;
3769   MPI_Comm    comm;
3770   Mat_SeqAIJ *aij;
3771   PetscBool   colflag, allcolumns = PETSC_FALSE;
3772 
3773   PetscFunctionBegin;
3774   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3775   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3776   PetscCallMPI(MPI_Comm_size(comm, &size));
3777 
3778   /* Check for special case: each processor gets entire matrix columns */
3779   PetscCall(ISIdentity(iscol, &colflag));
3780   PetscCall(ISGetLocalSize(iscol, &n));
3781   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3782   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3783 
3784   if (call == MAT_REUSE_MATRIX) {
3785     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3786     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3787     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3788   } else {
3789     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3790   }
3791 
3792   /*
3793       m - number of local rows
3794       n - number of columns (same on all processors)
3795       rstart - first row in new global matrix generated
3796   */
3797   PetscCall(MatGetSize(Mreuse, &m, &n));
3798   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3799   if (call == MAT_INITIAL_MATRIX) {
3800     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3801     ii  = aij->i;
3802     jj  = aij->j;
3803 
3804     /*
3805         Determine the number of non-zeros in the diagonal and off-diagonal
3806         portions of the matrix in order to do correct preallocation
3807     */
3808 
3809     /* first get start and end of "diagonal" columns */
3810     if (csize == PETSC_DECIDE) {
3811       PetscCall(ISGetSize(isrow, &mglobal));
3812       if (mglobal == n) { /* square matrix */
3813         nlocal = m;
3814       } else {
3815         nlocal = n / size + ((n % size) > rank);
3816       }
3817     } else {
3818       nlocal = csize;
3819     }
3820     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3821     rstart = rend - nlocal;
3822     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3823 
3824     /* next, compute all the lengths */
3825     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3826     olens = dlens + m;
3827     for (i = 0; i < m; i++) {
3828       jend = ii[i + 1] - ii[i];
3829       olen = 0;
3830       dlen = 0;
3831       for (j = 0; j < jend; j++) {
3832         if (*jj < rstart || *jj >= rend) olen++;
3833         else dlen++;
3834         jj++;
3835       }
3836       olens[i] = olen;
3837       dlens[i] = dlen;
3838     }
3839     PetscCall(MatCreate(comm, &M));
3840     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3841     PetscCall(MatSetBlockSizes(M, bs, cbs));
3842     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3843     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3844     PetscCall(PetscFree(dlens));
3845   } else {
3846     PetscInt ml, nl;
3847 
3848     M = *newmat;
3849     PetscCall(MatGetLocalSize(M, &ml, &nl));
3850     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3851     PetscCall(MatZeroEntries(M));
3852     /*
3853          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3854        rather than the slower MatSetValues().
3855     */
3856     M->was_assembled = PETSC_TRUE;
3857     M->assembled     = PETSC_FALSE;
3858   }
3859   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3860   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3861   ii  = aij->i;
3862   jj  = aij->j;
3863 
3864   /* trigger copy to CPU if needed */
3865   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3866   for (i = 0; i < m; i++) {
3867     row   = rstart + i;
3868     nz    = ii[i + 1] - ii[i];
3869     cwork = jj;
3870     jj += nz;
3871     vwork = aa;
3872     aa += nz;
3873     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3874   }
3875   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3876 
3877   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3878   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3879   *newmat = M;
3880 
3881   /* save submatrix used in processor for next request */
3882   if (call == MAT_INITIAL_MATRIX) {
3883     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3884     PetscCall(MatDestroy(&Mreuse));
3885   }
3886   PetscFunctionReturn(PETSC_SUCCESS);
3887 }
3888 
3889 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3890 {
3891   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3892   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3893   const PetscInt *JJ;
3894   PetscBool       nooffprocentries;
3895   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3896 
3897   PetscFunctionBegin;
3898   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3899 
3900   PetscCall(PetscLayoutSetUp(B->rmap));
3901   PetscCall(PetscLayoutSetUp(B->cmap));
3902   m      = B->rmap->n;
3903   cstart = B->cmap->rstart;
3904   cend   = B->cmap->rend;
3905   rstart = B->rmap->rstart;
3906 
3907   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3908 
3909   if (PetscDefined(USE_DEBUG)) {
3910     for (i = 0; i < m; i++) {
3911       nnz = Ii[i + 1] - Ii[i];
3912       JJ  = J + Ii[i];
3913       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3914       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3915       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3916     }
3917   }
3918 
3919   for (i = 0; i < m; i++) {
3920     nnz     = Ii[i + 1] - Ii[i];
3921     JJ      = J + Ii[i];
3922     nnz_max = PetscMax(nnz_max, nnz);
3923     d       = 0;
3924     for (j = 0; j < nnz; j++) {
3925       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3926     }
3927     d_nnz[i] = d;
3928     o_nnz[i] = nnz - d;
3929   }
3930   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3931   PetscCall(PetscFree2(d_nnz, o_nnz));
3932 
3933   for (i = 0; i < m; i++) {
3934     ii = i + rstart;
3935     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3936   }
3937   nooffprocentries    = B->nooffprocentries;
3938   B->nooffprocentries = PETSC_TRUE;
3939   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3940   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3941   B->nooffprocentries = nooffprocentries;
3942 
3943   /* count number of entries below block diagonal */
3944   PetscCall(PetscFree(Aij->ld));
3945   PetscCall(PetscCalloc1(m, &ld));
3946   Aij->ld = ld;
3947   for (i = 0; i < m; i++) {
3948     nnz = Ii[i + 1] - Ii[i];
3949     j   = 0;
3950     while (j < nnz && J[j] < cstart) j++;
3951     ld[i] = j;
3952     J += nnz;
3953   }
3954 
3955   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3956   PetscFunctionReturn(PETSC_SUCCESS);
3957 }
3958 
3959 /*@
3960    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3961    (the default parallel PETSc format).
3962 
3963    Collective
3964 
3965    Input Parameters:
3966 +  B - the matrix
3967 .  i - the indices into j for the start of each local row (starts with zero)
3968 .  j - the column indices for each local row (starts with zero)
3969 -  v - optional values in the matrix
3970 
3971    Level: developer
3972 
3973    Notes:
3974        The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc;
3975      thus you CANNOT change the matrix entries by changing the values of `v` after you have
3976      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3977 
3978        The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
3979 
3980        The format which is used for the sparse matrix input, is equivalent to a
3981     row-major ordering.. i.e for the following matrix, the input data expected is
3982     as shown
3983 
3984 .vb
3985         1 0 0
3986         2 0 3     P0
3987        -------
3988         4 5 6     P1
3989 
3990      Process0 [P0] rows_owned=[0,1]
3991         i =  {0,1,3}  [size = nrow+1  = 2+1]
3992         j =  {0,0,2}  [size = 3]
3993         v =  {1,2,3}  [size = 3]
3994 
3995      Process1 [P1] rows_owned=[2]
3996         i =  {0,3}    [size = nrow+1  = 1+1]
3997         j =  {0,1,2}  [size = 3]
3998         v =  {4,5,6}  [size = 3]
3999 .ve
4000 
4001 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
4002           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
4003 @*/
4004 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4005 {
4006   PetscFunctionBegin;
4007   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4008   PetscFunctionReturn(PETSC_SUCCESS);
4009 }
4010 
4011 /*@C
4012    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4013    (the default parallel PETSc format).  For good matrix assembly performance
4014    the user should preallocate the matrix storage by setting the parameters
4015    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4016 
4017    Collective
4018 
4019    Input Parameters:
4020 +  B - the matrix
4021 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4022            (same value is used for all local rows)
4023 .  d_nnz - array containing the number of nonzeros in the various rows of the
4024            DIAGONAL portion of the local submatrix (possibly different for each row)
4025            or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure.
4026            The size of this array is equal to the number of local rows, i.e 'm'.
4027            For matrices that will be factored, you must leave room for (and set)
4028            the diagonal entry even if it is zero.
4029 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4030            submatrix (same value is used for all local rows).
4031 -  o_nnz - array containing the number of nonzeros in the various rows of the
4032            OFF-DIAGONAL portion of the local submatrix (possibly different for
4033            each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero
4034            structure. The size of this array is equal to the number
4035            of local rows, i.e 'm'.
4036 
4037    Usage:
4038    Consider the following 8x8 matrix with 34 non-zero values, that is
4039    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4040    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4041    as follows
4042 
4043 .vb
4044             1  2  0  |  0  3  0  |  0  4
4045     Proc0   0  5  6  |  7  0  0  |  8  0
4046             9  0 10  | 11  0  0  | 12  0
4047     -------------------------------------
4048            13  0 14  | 15 16 17  |  0  0
4049     Proc1   0 18  0  | 19 20 21  |  0  0
4050             0  0  0  | 22 23  0  | 24  0
4051     -------------------------------------
4052     Proc2  25 26 27  |  0  0 28  | 29  0
4053            30  0  0  | 31 32 33  |  0 34
4054 .ve
4055 
4056    This can be represented as a collection of submatrices as
4057 .vb
4058       A B C
4059       D E F
4060       G H I
4061 .ve
4062 
4063    Where the submatrices A,B,C are owned by proc0, D,E,F are
4064    owned by proc1, G,H,I are owned by proc2.
4065 
4066    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4067    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4068    The 'M','N' parameters are 8,8, and have the same values on all procs.
4069 
4070    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4071    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4072    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4073    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4074    part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4075    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4076 
4077    When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4078    allocated for every row of the local diagonal submatrix, and `o_nz`
4079    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4080    One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4081    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4082    In this case, the values of `d_nz`, `o_nz` are
4083 .vb
4084      proc0  dnz = 2, o_nz = 2
4085      proc1  dnz = 3, o_nz = 2
4086      proc2  dnz = 1, o_nz = 4
4087 .ve
4088    We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This
4089    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4090    for proc3. i.e we are using 12+15+10=37 storage locations to store
4091    34 values.
4092 
4093    When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4094    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4095    In the above case the values for `d_nnz`, `o_nnz` are
4096 .vb
4097      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4098      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4099      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4100 .ve
4101    Here the space allocated is sum of all the above values i.e 34, and
4102    hence pre-allocation is perfect.
4103 
4104    Level: intermediate
4105 
4106    Notes:
4107    If the *_nnz parameter is given then the *_nz parameter is ignored
4108 
4109    The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran
4110    storage.  The stored row and column indices begin with zero.
4111    See [Sparse Matrices](sec_matsparse) for details.
4112 
4113    The parallel matrix is partitioned such that the first m0 rows belong to
4114    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4115    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4116 
4117    The DIAGONAL portion of the local submatrix of a processor can be defined
4118    as the submatrix which is obtained by extraction the part corresponding to
4119    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4120    first row that belongs to the processor, r2 is the last row belonging to
4121    the this processor, and c1-c2 is range of indices of the local part of a
4122    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4123    common case of a square matrix, the row and column ranges are the same and
4124    the DIAGONAL part is also square. The remaining portion of the local
4125    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4126 
4127    If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored.
4128 
4129    You can call `MatGetInfo()` to get information on how effective the preallocation was;
4130    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4131    You can also run with the option `-info` and look for messages with the string
4132    malloc in them to see if additional memory allocation was needed.
4133 
4134 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4135           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4136 @*/
4137 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4138 {
4139   PetscFunctionBegin;
4140   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4141   PetscValidType(B, 1);
4142   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4143   PetscFunctionReturn(PETSC_SUCCESS);
4144 }
4145 
4146 /*@
4147      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4148          CSR format for the local rows.
4149 
4150    Collective
4151 
4152    Input Parameters:
4153 +  comm - MPI communicator
4154 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4155 .  n - This value should be the same as the local size used in creating the
4156        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4157        calculated if N is given) For square matrices n is almost always m.
4158 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4159 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4160 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4161 .   j - column indices
4162 -   a - optional matrix values
4163 
4164    Output Parameter:
4165 .   mat - the matrix
4166 
4167    Level: intermediate
4168 
4169    Notes:
4170        The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
4171      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4172      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
4173 
4174        The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
4175 
4176        The format which is used for the sparse matrix input, is equivalent to a
4177     row-major ordering.. i.e for the following matrix, the input data expected is
4178     as shown
4179 
4180        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4181 .vb
4182         1 0 0
4183         2 0 3     P0
4184        -------
4185         4 5 6     P1
4186 
4187      Process0 [P0] rows_owned=[0,1]
4188         i =  {0,1,3}  [size = nrow+1  = 2+1]
4189         j =  {0,0,2}  [size = 3]
4190         v =  {1,2,3}  [size = 3]
4191 
4192      Process1 [P1] rows_owned=[2]
4193         i =  {0,3}    [size = nrow+1  = 1+1]
4194         j =  {0,1,2}  [size = 3]
4195         v =  {4,5,6}  [size = 3]
4196 .ve
4197 
4198 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4199           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4200 @*/
4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4202 {
4203   PetscFunctionBegin;
4204   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4205   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4206   PetscCall(MatCreate(comm, mat));
4207   PetscCall(MatSetSizes(*mat, m, n, M, N));
4208   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4209   PetscCall(MatSetType(*mat, MATMPIAIJ));
4210   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4211   PetscFunctionReturn(PETSC_SUCCESS);
4212 }
4213 
4214 /*@
4215      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4216      CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed
4217      from `MatCreateMPIAIJWithArrays()`
4218 
4219      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4220 
4221    Collective
4222 
4223    Input Parameters:
4224 +  mat - the matrix
4225 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4226 .  n - This value should be the same as the local size used in creating the
4227        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4228        calculated if N is given) For square matrices n is almost always m.
4229 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4230 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4231 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4232 .  J - column indices
4233 -  v - matrix values
4234 
4235    Level: deprecated
4236 
4237 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4238           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4239 @*/
4240 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4241 {
4242   PetscInt        nnz, i;
4243   PetscBool       nooffprocentries;
4244   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4245   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4246   PetscScalar    *ad, *ao;
4247   PetscInt        ldi, Iii, md;
4248   const PetscInt *Adi = Ad->i;
4249   PetscInt       *ld  = Aij->ld;
4250 
4251   PetscFunctionBegin;
4252   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4253   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4254   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4255   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4256 
4257   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4258   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4259 
4260   for (i = 0; i < m; i++) {
4261     nnz = Ii[i + 1] - Ii[i];
4262     Iii = Ii[i];
4263     ldi = ld[i];
4264     md  = Adi[i + 1] - Adi[i];
4265     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4266     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4267     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4268     ad += md;
4269     ao += nnz - md;
4270   }
4271   nooffprocentries      = mat->nooffprocentries;
4272   mat->nooffprocentries = PETSC_TRUE;
4273   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4274   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4275   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4276   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4277   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4278   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4279   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4280   mat->nooffprocentries = nooffprocentries;
4281   PetscFunctionReturn(PETSC_SUCCESS);
4282 }
4283 
4284 /*@
4285      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4286 
4287    Collective
4288 
4289    Input Parameters:
4290 +  mat - the matrix
4291 -  v - matrix values, stored by row
4292 
4293    Level: intermediate
4294 
4295    Note:
4296    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4297 
4298 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4299           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4300 @*/
4301 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4302 {
4303   PetscInt        nnz, i, m;
4304   PetscBool       nooffprocentries;
4305   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4306   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4307   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4308   PetscScalar    *ad, *ao;
4309   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4310   PetscInt        ldi, Iii, md;
4311   PetscInt       *ld = Aij->ld;
4312 
4313   PetscFunctionBegin;
4314   m = mat->rmap->n;
4315 
4316   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4317   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4318   Iii = 0;
4319   for (i = 0; i < m; i++) {
4320     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4321     ldi = ld[i];
4322     md  = Adi[i + 1] - Adi[i];
4323     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4324     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4325     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4326     ad += md;
4327     ao += nnz - md;
4328     Iii += nnz;
4329   }
4330   nooffprocentries      = mat->nooffprocentries;
4331   mat->nooffprocentries = PETSC_TRUE;
4332   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4333   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4334   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4335   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4336   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4337   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4338   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4339   mat->nooffprocentries = nooffprocentries;
4340   PetscFunctionReturn(PETSC_SUCCESS);
4341 }
4342 
4343 /*@C
4344    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4345    (the default parallel PETSc format).  For good matrix assembly performance
4346    the user should preallocate the matrix storage by setting the parameters
4347    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
4348 
4349    Collective
4350 
4351    Input Parameters:
4352 +  comm - MPI communicator
4353 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4354            This value should be the same as the local size used in creating the
4355            y vector for the matrix-vector product y = Ax.
4356 .  n - This value should be the same as the local size used in creating the
4357        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4358        calculated if N is given) For square matrices n is almost always m.
4359 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4360 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4361 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4362            (same value is used for all local rows)
4363 .  d_nnz - array containing the number of nonzeros in the various rows of the
4364            DIAGONAL portion of the local submatrix (possibly different for each row)
4365            or `NULL`, if `d_nz` is used to specify the nonzero structure.
4366            The size of this array is equal to the number of local rows, i.e 'm'.
4367 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4368            submatrix (same value is used for all local rows).
4369 -  o_nnz - array containing the number of nonzeros in the various rows of the
4370            OFF-DIAGONAL portion of the local submatrix (possibly different for
4371            each row) or `NULL`, if `o_nz` is used to specify the nonzero
4372            structure. The size of this array is equal to the number
4373            of local rows, i.e 'm'.
4374 
4375    Output Parameter:
4376 .  A - the matrix
4377 
4378    Options Database Keys:
4379 +  -mat_no_inode  - Do not use inodes
4380 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4381 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4382         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4383         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4384 
4385    Level: intermediate
4386 
4387    Notes:
4388    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4389    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4390    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4391 
4392    If the *_nnz parameter is given then the *_nz parameter is ignored
4393 
4394    The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across
4395    processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate
4396    storage requirements for this matrix.
4397 
4398    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4399    processor than it must be used on all processors that share the object for
4400    that argument.
4401 
4402    The user MUST specify either the local or global matrix dimensions
4403    (possibly both).
4404 
4405    The parallel matrix is partitioned across processors such that the
4406    first m0 rows belong to process 0, the next m1 rows belong to
4407    process 1, the next m2 rows belong to process 2 etc.. where
4408    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4409    values corresponding to [m x N] submatrix.
4410 
4411    The columns are logically partitioned with the n0 columns belonging
4412    to 0th partition, the next n1 columns belonging to the next
4413    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4414 
4415    The DIAGONAL portion of the local submatrix on any given processor
4416    is the submatrix corresponding to the rows and columns m,n
4417    corresponding to the given processor. i.e diagonal matrix on
4418    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4419    etc. The remaining portion of the local submatrix [m x (N-n)]
4420    constitute the OFF-DIAGONAL portion. The example below better
4421    illustrates this concept.
4422 
4423    For a square global matrix we define each processor's diagonal portion
4424    to be its local rows and the corresponding columns (a square submatrix);
4425    each processor's off-diagonal portion encompasses the remainder of the
4426    local matrix (a rectangular submatrix).
4427 
4428    If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored.
4429 
4430    When calling this routine with a single process communicator, a matrix of
4431    type `MATSEQAIJ` is returned.  If a matrix of type `MATMPIAIJ` is desired for this
4432    type of communicator, use the construction mechanism
4433 .vb
4434      MatCreate(...,&A);
4435      MatSetType(A,MATMPIAIJ);
4436      MatSetSizes(A, m,n,M,N);
4437      MatMPIAIJSetPreallocation(A,...);
4438 .ve
4439 
4440    By default, this format uses inodes (identical nodes) when possible.
4441    We search for consecutive rows with the same nonzero structure, thereby
4442    reusing matrix information to achieve increased efficiency.
4443 
4444    Usage:
4445    Consider the following 8x8 matrix with 34 non-zero values, that is
4446    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4447    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4448    as follows
4449 
4450 .vb
4451             1  2  0  |  0  3  0  |  0  4
4452     Proc0   0  5  6  |  7  0  0  |  8  0
4453             9  0 10  | 11  0  0  | 12  0
4454     -------------------------------------
4455            13  0 14  | 15 16 17  |  0  0
4456     Proc1   0 18  0  | 19 20 21  |  0  0
4457             0  0  0  | 22 23  0  | 24  0
4458     -------------------------------------
4459     Proc2  25 26 27  |  0  0 28  | 29  0
4460            30  0  0  | 31 32 33  |  0 34
4461 .ve
4462 
4463    This can be represented as a collection of submatrices as
4464 
4465 .vb
4466       A B C
4467       D E F
4468       G H I
4469 .ve
4470 
4471    Where the submatrices A,B,C are owned by proc0, D,E,F are
4472    owned by proc1, G,H,I are owned by proc2.
4473 
4474    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4475    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4476    The 'M','N' parameters are 8,8, and have the same values on all procs.
4477 
4478    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4479    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4480    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4481    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4482    part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ`
4483    matrix, ans [DF] as another SeqAIJ matrix.
4484 
4485    When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are
4486    allocated for every row of the local diagonal submatrix, and `o_nz`
4487    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4488    One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local
4489    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4490    In this case, the values of `d_nz`,`o_nz` are
4491 .vb
4492      proc0  dnz = 2, o_nz = 2
4493      proc1  dnz = 3, o_nz = 2
4494      proc2  dnz = 1, o_nz = 4
4495 .ve
4496    We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This
4497    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4498    for proc3. i.e we are using 12+15+10=37 storage locations to store
4499    34 values.
4500 
4501    When `d_nnz`, `o_nnz` parameters are specified, the storage is specified
4502    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4503    In the above case the values for d_nnz,o_nnz are
4504 .vb
4505      proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2]
4506      proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1]
4507      proc2 d_nnz = [1,1]   and o_nnz = [4,4]
4508 .ve
4509    Here the space allocated is sum of all the above values i.e 34, and
4510    hence pre-allocation is perfect.
4511 
4512 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4513           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4514 @*/
4515 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4516 {
4517   PetscMPIInt size;
4518 
4519   PetscFunctionBegin;
4520   PetscCall(MatCreate(comm, A));
4521   PetscCall(MatSetSizes(*A, m, n, M, N));
4522   PetscCallMPI(MPI_Comm_size(comm, &size));
4523   if (size > 1) {
4524     PetscCall(MatSetType(*A, MATMPIAIJ));
4525     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4526   } else {
4527     PetscCall(MatSetType(*A, MATSEQAIJ));
4528     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4529   }
4530   PetscFunctionReturn(PETSC_SUCCESS);
4531 }
4532 
4533 /*MC
4534     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4535 
4536     Synopsis:
4537     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4538 
4539     Not Collective
4540 
4541     Input Parameter:
4542 .   A - the `MATMPIAIJ` matrix
4543 
4544     Output Parameters:
4545 +   Ad - the diagonal portion of the matrix
4546 .   Ao - the off diagonal portion of the matrix
4547 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4548 -   ierr - error code
4549 
4550      Level: advanced
4551 
4552     Note:
4553     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4554 
4555 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4556 M*/
4557 
4558 /*MC
4559     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4560 
4561     Synopsis:
4562     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4563 
4564     Not Collective
4565 
4566     Input Parameters:
4567 +   A - the `MATMPIAIJ` matrix
4568 .   Ad - the diagonal portion of the matrix
4569 .   Ao - the off diagonal portion of the matrix
4570 .   colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4571 -   ierr - error code
4572 
4573      Level: advanced
4574 
4575 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4576 M*/
4577 
4578 /*@C
4579   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4580 
4581   Not Collective
4582 
4583   Input Parameter:
4584 . A - The `MATMPIAIJ` matrix
4585 
4586   Output Parameters:
4587 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4588 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4589 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix
4590 
4591   Level: intermediate
4592 
4593   Note:
4594   The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns
4595   in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is
4596   the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these
4597   local column numbers to global column numbers in the original matrix.
4598 
4599   Fortran Note:
4600   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4601 
4602 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4603 @*/
4604 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4605 {
4606   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4607   PetscBool   flg;
4608 
4609   PetscFunctionBegin;
4610   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4611   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4612   if (Ad) *Ad = a->A;
4613   if (Ao) *Ao = a->B;
4614   if (colmap) *colmap = a->garray;
4615   PetscFunctionReturn(PETSC_SUCCESS);
4616 }
4617 
4618 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4619 {
4620   PetscInt     m, N, i, rstart, nnz, Ii;
4621   PetscInt    *indx;
4622   PetscScalar *values;
4623   MatType      rootType;
4624 
4625   PetscFunctionBegin;
4626   PetscCall(MatGetSize(inmat, &m, &N));
4627   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4628     PetscInt *dnz, *onz, sum, bs, cbs;
4629 
4630     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4631     /* Check sum(n) = N */
4632     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4633     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4634 
4635     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4636     rstart -= m;
4637 
4638     MatPreallocateBegin(comm, m, n, dnz, onz);
4639     for (i = 0; i < m; i++) {
4640       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4641       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4642       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4643     }
4644 
4645     PetscCall(MatCreate(comm, outmat));
4646     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4647     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4648     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4649     PetscCall(MatGetRootType_Private(inmat, &rootType));
4650     PetscCall(MatSetType(*outmat, rootType));
4651     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4652     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4653     MatPreallocateEnd(dnz, onz);
4654     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4655   }
4656 
4657   /* numeric phase */
4658   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4659   for (i = 0; i < m; i++) {
4660     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4661     Ii = i + rstart;
4662     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4663     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4664   }
4665   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4666   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4667   PetscFunctionReturn(PETSC_SUCCESS);
4668 }
4669 
4670 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4671 {
4672   PetscMPIInt        rank;
4673   PetscInt           m, N, i, rstart, nnz;
4674   size_t             len;
4675   const PetscInt    *indx;
4676   PetscViewer        out;
4677   char              *name;
4678   Mat                B;
4679   const PetscScalar *values;
4680 
4681   PetscFunctionBegin;
4682   PetscCall(MatGetLocalSize(A, &m, NULL));
4683   PetscCall(MatGetSize(A, NULL, &N));
4684   /* Should this be the type of the diagonal block of A? */
4685   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4686   PetscCall(MatSetSizes(B, m, N, m, N));
4687   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4688   PetscCall(MatSetType(B, MATSEQAIJ));
4689   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4690   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4691   for (i = 0; i < m; i++) {
4692     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4693     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4694     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4695   }
4696   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4697   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4698 
4699   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4700   PetscCall(PetscStrlen(outfile, &len));
4701   PetscCall(PetscMalloc1(len + 6, &name));
4702   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4703   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4704   PetscCall(PetscFree(name));
4705   PetscCall(MatView(B, out));
4706   PetscCall(PetscViewerDestroy(&out));
4707   PetscCall(MatDestroy(&B));
4708   PetscFunctionReturn(PETSC_SUCCESS);
4709 }
4710 
4711 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4712 {
4713   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4714 
4715   PetscFunctionBegin;
4716   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4717   PetscCall(PetscFree(merge->id_r));
4718   PetscCall(PetscFree(merge->len_s));
4719   PetscCall(PetscFree(merge->len_r));
4720   PetscCall(PetscFree(merge->bi));
4721   PetscCall(PetscFree(merge->bj));
4722   PetscCall(PetscFree(merge->buf_ri[0]));
4723   PetscCall(PetscFree(merge->buf_ri));
4724   PetscCall(PetscFree(merge->buf_rj[0]));
4725   PetscCall(PetscFree(merge->buf_rj));
4726   PetscCall(PetscFree(merge->coi));
4727   PetscCall(PetscFree(merge->coj));
4728   PetscCall(PetscFree(merge->owners_co));
4729   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4730   PetscCall(PetscFree(merge));
4731   PetscFunctionReturn(PETSC_SUCCESS);
4732 }
4733 
4734 #include <../src/mat/utils/freespace.h>
4735 #include <petscbt.h>
4736 
4737 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4738 {
4739   MPI_Comm             comm;
4740   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4741   PetscMPIInt          size, rank, taga, *len_s;
4742   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4743   PetscInt             proc, m;
4744   PetscInt           **buf_ri, **buf_rj;
4745   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4746   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4747   MPI_Request         *s_waits, *r_waits;
4748   MPI_Status          *status;
4749   const MatScalar     *aa, *a_a;
4750   MatScalar          **abuf_r, *ba_i;
4751   Mat_Merge_SeqsToMPI *merge;
4752   PetscContainer       container;
4753 
4754   PetscFunctionBegin;
4755   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4756   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4757 
4758   PetscCallMPI(MPI_Comm_size(comm, &size));
4759   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4760 
4761   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4762   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4763   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4764   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4765   aa = a_a;
4766 
4767   bi     = merge->bi;
4768   bj     = merge->bj;
4769   buf_ri = merge->buf_ri;
4770   buf_rj = merge->buf_rj;
4771 
4772   PetscCall(PetscMalloc1(size, &status));
4773   owners = merge->rowmap->range;
4774   len_s  = merge->len_s;
4775 
4776   /* send and recv matrix values */
4777   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4778   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4779 
4780   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4781   for (proc = 0, k = 0; proc < size; proc++) {
4782     if (!len_s[proc]) continue;
4783     i = owners[proc];
4784     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4785     k++;
4786   }
4787 
4788   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4789   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4790   PetscCall(PetscFree(status));
4791 
4792   PetscCall(PetscFree(s_waits));
4793   PetscCall(PetscFree(r_waits));
4794 
4795   /* insert mat values of mpimat */
4796   PetscCall(PetscMalloc1(N, &ba_i));
4797   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4798 
4799   for (k = 0; k < merge->nrecv; k++) {
4800     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4801     nrows       = *(buf_ri_k[k]);
4802     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4803     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4804   }
4805 
4806   /* set values of ba */
4807   m = merge->rowmap->n;
4808   for (i = 0; i < m; i++) {
4809     arow = owners[rank] + i;
4810     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4811     bnzi = bi[i + 1] - bi[i];
4812     PetscCall(PetscArrayzero(ba_i, bnzi));
4813 
4814     /* add local non-zero vals of this proc's seqmat into ba */
4815     anzi   = ai[arow + 1] - ai[arow];
4816     aj     = a->j + ai[arow];
4817     aa     = a_a + ai[arow];
4818     nextaj = 0;
4819     for (j = 0; nextaj < anzi; j++) {
4820       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4821         ba_i[j] += aa[nextaj++];
4822       }
4823     }
4824 
4825     /* add received vals into ba */
4826     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4827       /* i-th row */
4828       if (i == *nextrow[k]) {
4829         anzi   = *(nextai[k] + 1) - *nextai[k];
4830         aj     = buf_rj[k] + *(nextai[k]);
4831         aa     = abuf_r[k] + *(nextai[k]);
4832         nextaj = 0;
4833         for (j = 0; nextaj < anzi; j++) {
4834           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4835             ba_i[j] += aa[nextaj++];
4836           }
4837         }
4838         nextrow[k]++;
4839         nextai[k]++;
4840       }
4841     }
4842     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4843   }
4844   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4845   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4846   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4847 
4848   PetscCall(PetscFree(abuf_r[0]));
4849   PetscCall(PetscFree(abuf_r));
4850   PetscCall(PetscFree(ba_i));
4851   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4852   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4853   PetscFunctionReturn(PETSC_SUCCESS);
4854 }
4855 
4856 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4857 {
4858   Mat                  B_mpi;
4859   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4860   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4861   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4862   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4863   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4864   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4865   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4866   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4867   MPI_Status          *status;
4868   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4869   PetscBT              lnkbt;
4870   Mat_Merge_SeqsToMPI *merge;
4871   PetscContainer       container;
4872 
4873   PetscFunctionBegin;
4874   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4875 
4876   /* make sure it is a PETSc comm */
4877   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4878   PetscCallMPI(MPI_Comm_size(comm, &size));
4879   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4880 
4881   PetscCall(PetscNew(&merge));
4882   PetscCall(PetscMalloc1(size, &status));
4883 
4884   /* determine row ownership */
4885   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4886   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4887   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4888   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4889   PetscCall(PetscLayoutSetUp(merge->rowmap));
4890   PetscCall(PetscMalloc1(size, &len_si));
4891   PetscCall(PetscMalloc1(size, &merge->len_s));
4892 
4893   m      = merge->rowmap->n;
4894   owners = merge->rowmap->range;
4895 
4896   /* determine the number of messages to send, their lengths */
4897   len_s = merge->len_s;
4898 
4899   len          = 0; /* length of buf_si[] */
4900   merge->nsend = 0;
4901   for (proc = 0; proc < size; proc++) {
4902     len_si[proc] = 0;
4903     if (proc == rank) {
4904       len_s[proc] = 0;
4905     } else {
4906       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4907       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4908     }
4909     if (len_s[proc]) {
4910       merge->nsend++;
4911       nrows = 0;
4912       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4913         if (ai[i + 1] > ai[i]) nrows++;
4914       }
4915       len_si[proc] = 2 * (nrows + 1);
4916       len += len_si[proc];
4917     }
4918   }
4919 
4920   /* determine the number and length of messages to receive for ij-structure */
4921   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4922   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4923 
4924   /* post the Irecv of j-structure */
4925   PetscCall(PetscCommGetNewTag(comm, &tagj));
4926   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4927 
4928   /* post the Isend of j-structure */
4929   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4930 
4931   for (proc = 0, k = 0; proc < size; proc++) {
4932     if (!len_s[proc]) continue;
4933     i = owners[proc];
4934     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4935     k++;
4936   }
4937 
4938   /* receives and sends of j-structure are complete */
4939   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4940   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4941 
4942   /* send and recv i-structure */
4943   PetscCall(PetscCommGetNewTag(comm, &tagi));
4944   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4945 
4946   PetscCall(PetscMalloc1(len + 1, &buf_s));
4947   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4948   for (proc = 0, k = 0; proc < size; proc++) {
4949     if (!len_s[proc]) continue;
4950     /* form outgoing message for i-structure:
4951          buf_si[0]:                 nrows to be sent
4952                [1:nrows]:           row index (global)
4953                [nrows+1:2*nrows+1]: i-structure index
4954     */
4955     nrows       = len_si[proc] / 2 - 1;
4956     buf_si_i    = buf_si + nrows + 1;
4957     buf_si[0]   = nrows;
4958     buf_si_i[0] = 0;
4959     nrows       = 0;
4960     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4961       anzi = ai[i + 1] - ai[i];
4962       if (anzi) {
4963         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4964         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4965         nrows++;
4966       }
4967     }
4968     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4969     k++;
4970     buf_si += len_si[proc];
4971   }
4972 
4973   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4974   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4975 
4976   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4977   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4978 
4979   PetscCall(PetscFree(len_si));
4980   PetscCall(PetscFree(len_ri));
4981   PetscCall(PetscFree(rj_waits));
4982   PetscCall(PetscFree2(si_waits, sj_waits));
4983   PetscCall(PetscFree(ri_waits));
4984   PetscCall(PetscFree(buf_s));
4985   PetscCall(PetscFree(status));
4986 
4987   /* compute a local seq matrix in each processor */
4988   /* allocate bi array and free space for accumulating nonzero column info */
4989   PetscCall(PetscMalloc1(m + 1, &bi));
4990   bi[0] = 0;
4991 
4992   /* create and initialize a linked list */
4993   nlnk = N + 1;
4994   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
4995 
4996   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4997   len = ai[owners[rank + 1]] - ai[owners[rank]];
4998   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
4999 
5000   current_space = free_space;
5001 
5002   /* determine symbolic info for each local row */
5003   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
5004 
5005   for (k = 0; k < merge->nrecv; k++) {
5006     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5007     nrows       = *buf_ri_k[k];
5008     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5009     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5010   }
5011 
5012   MatPreallocateBegin(comm, m, n, dnz, onz);
5013   len = 0;
5014   for (i = 0; i < m; i++) {
5015     bnzi = 0;
5016     /* add local non-zero cols of this proc's seqmat into lnk */
5017     arow = owners[rank] + i;
5018     anzi = ai[arow + 1] - ai[arow];
5019     aj   = a->j + ai[arow];
5020     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5021     bnzi += nlnk;
5022     /* add received col data into lnk */
5023     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5024       if (i == *nextrow[k]) {            /* i-th row */
5025         anzi = *(nextai[k] + 1) - *nextai[k];
5026         aj   = buf_rj[k] + *nextai[k];
5027         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5028         bnzi += nlnk;
5029         nextrow[k]++;
5030         nextai[k]++;
5031       }
5032     }
5033     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5034 
5035     /* if free space is not available, make more free space */
5036     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5037     /* copy data into free space, then initialize lnk */
5038     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5039     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5040 
5041     current_space->array += bnzi;
5042     current_space->local_used += bnzi;
5043     current_space->local_remaining -= bnzi;
5044 
5045     bi[i + 1] = bi[i] + bnzi;
5046   }
5047 
5048   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5049 
5050   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5051   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5052   PetscCall(PetscLLDestroy(lnk, lnkbt));
5053 
5054   /* create symbolic parallel matrix B_mpi */
5055   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5056   PetscCall(MatCreate(comm, &B_mpi));
5057   if (n == PETSC_DECIDE) {
5058     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5059   } else {
5060     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5061   }
5062   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5063   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5064   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5065   MatPreallocateEnd(dnz, onz);
5066   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5067 
5068   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5069   B_mpi->assembled = PETSC_FALSE;
5070   merge->bi        = bi;
5071   merge->bj        = bj;
5072   merge->buf_ri    = buf_ri;
5073   merge->buf_rj    = buf_rj;
5074   merge->coi       = NULL;
5075   merge->coj       = NULL;
5076   merge->owners_co = NULL;
5077 
5078   PetscCall(PetscCommDestroy(&comm));
5079 
5080   /* attach the supporting struct to B_mpi for reuse */
5081   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5082   PetscCall(PetscContainerSetPointer(container, merge));
5083   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5084   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5085   PetscCall(PetscContainerDestroy(&container));
5086   *mpimat = B_mpi;
5087 
5088   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5089   PetscFunctionReturn(PETSC_SUCCESS);
5090 }
5091 
5092 /*@C
5093       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5094                  matrices from each processor
5095 
5096     Collective
5097 
5098    Input Parameters:
5099 +    comm - the communicators the parallel matrix will live on
5100 .    seqmat - the input sequential matrices
5101 .    m - number of local rows (or `PETSC_DECIDE`)
5102 .    n - number of local columns (or `PETSC_DECIDE`)
5103 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5104 
5105    Output Parameter:
5106 .    mpimat - the parallel matrix generated
5107 
5108     Level: advanced
5109 
5110    Note:
5111      The dimensions of the sequential matrix in each processor MUST be the same.
5112      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5113      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5114 
5115 seealso: [](chapter_matrices), `Mat`, `MatCreateAIJ()`
5116 @*/
5117 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5118 {
5119   PetscMPIInt size;
5120 
5121   PetscFunctionBegin;
5122   PetscCallMPI(MPI_Comm_size(comm, &size));
5123   if (size == 1) {
5124     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5125     if (scall == MAT_INITIAL_MATRIX) {
5126       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5127     } else {
5128       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5129     }
5130     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5131     PetscFunctionReturn(PETSC_SUCCESS);
5132   }
5133   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5134   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5135   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5136   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5137   PetscFunctionReturn(PETSC_SUCCESS);
5138 }
5139 
5140 /*@
5141      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking its local rows and putting them into a sequential matrix with
5142           mlocal rows and n columns. Where mlocal is obtained with `MatGetLocalSize()` and n is the global column count obtained
5143           with `MatGetSize()`
5144 
5145     Not Collective
5146 
5147    Input Parameters:
5148 .    A - the matrix
5149 
5150    Output Parameter:
5151 .    A_loc - the local sequential matrix generated
5152 
5153     Level: developer
5154 
5155    Notes:
5156      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5157 
5158      Destroy the matrix with `MatDestroy()`
5159 
5160 .seealso: [](chapter_matrices), `Mat`, `MatMPIAIJGetLocalMat()`
5161 @*/
5162 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5163 {
5164   PetscBool mpi;
5165 
5166   PetscFunctionBegin;
5167   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5168   if (mpi) {
5169     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5170   } else {
5171     *A_loc = A;
5172     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5173   }
5174   PetscFunctionReturn(PETSC_SUCCESS);
5175 }
5176 
5177 /*@
5178      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5179           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5180           with `MatGetSize()`
5181 
5182     Not Collective
5183 
5184    Input Parameters:
5185 +    A - the matrix
5186 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5187 
5188    Output Parameter:
5189 .    A_loc - the local sequential matrix generated
5190 
5191     Level: developer
5192 
5193    Notes:
5194      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5195 
5196      When the communicator associated with `A` has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A`.
5197      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called.
5198      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5199      modify the values of the returned `A_loc`.
5200 
5201 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5202 @*/
5203 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5204 {
5205   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5206   Mat_SeqAIJ        *mat, *a, *b;
5207   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5208   const PetscScalar *aa, *ba, *aav, *bav;
5209   PetscScalar       *ca, *cam;
5210   PetscMPIInt        size;
5211   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5212   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5213   PetscBool          match;
5214 
5215   PetscFunctionBegin;
5216   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5217   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5218   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5219   if (size == 1) {
5220     if (scall == MAT_INITIAL_MATRIX) {
5221       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5222       *A_loc = mpimat->A;
5223     } else if (scall == MAT_REUSE_MATRIX) {
5224       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5225     }
5226     PetscFunctionReturn(PETSC_SUCCESS);
5227   }
5228 
5229   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5230   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5231   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5232   ai = a->i;
5233   aj = a->j;
5234   bi = b->i;
5235   bj = b->j;
5236   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5237   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5238   aa = aav;
5239   ba = bav;
5240   if (scall == MAT_INITIAL_MATRIX) {
5241     PetscCall(PetscMalloc1(1 + am, &ci));
5242     ci[0] = 0;
5243     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5244     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5245     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5246     k = 0;
5247     for (i = 0; i < am; i++) {
5248       ncols_o = bi[i + 1] - bi[i];
5249       ncols_d = ai[i + 1] - ai[i];
5250       /* off-diagonal portion of A */
5251       for (jo = 0; jo < ncols_o; jo++) {
5252         col = cmap[*bj];
5253         if (col >= cstart) break;
5254         cj[k] = col;
5255         bj++;
5256         ca[k++] = *ba++;
5257       }
5258       /* diagonal portion of A */
5259       for (j = 0; j < ncols_d; j++) {
5260         cj[k]   = cstart + *aj++;
5261         ca[k++] = *aa++;
5262       }
5263       /* off-diagonal portion of A */
5264       for (j = jo; j < ncols_o; j++) {
5265         cj[k]   = cmap[*bj++];
5266         ca[k++] = *ba++;
5267       }
5268     }
5269     /* put together the new matrix */
5270     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5271     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5272     /* Since these are PETSc arrays, change flags to free them as necessary. */
5273     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5274     mat->free_a  = PETSC_TRUE;
5275     mat->free_ij = PETSC_TRUE;
5276     mat->nonew   = 0;
5277   } else if (scall == MAT_REUSE_MATRIX) {
5278     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5279     ci  = mat->i;
5280     cj  = mat->j;
5281     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5282     for (i = 0; i < am; i++) {
5283       /* off-diagonal portion of A */
5284       ncols_o = bi[i + 1] - bi[i];
5285       for (jo = 0; jo < ncols_o; jo++) {
5286         col = cmap[*bj];
5287         if (col >= cstart) break;
5288         *cam++ = *ba++;
5289         bj++;
5290       }
5291       /* diagonal portion of A */
5292       ncols_d = ai[i + 1] - ai[i];
5293       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5294       /* off-diagonal portion of A */
5295       for (j = jo; j < ncols_o; j++) {
5296         *cam++ = *ba++;
5297         bj++;
5298       }
5299     }
5300     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5301   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5302   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5303   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5304   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5305   PetscFunctionReturn(PETSC_SUCCESS);
5306 }
5307 
5308 /*@
5309      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5310           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5311 
5312     Not Collective
5313 
5314    Input Parameters:
5315 +    A - the matrix
5316 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5317 
5318    Output Parameters:
5319 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`)
5320 -    A_loc - the local sequential matrix generated
5321 
5322     Level: developer
5323 
5324    Note:
5325      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal
5326      part, then those associated with the off diagonal part (in its local ordering)
5327 
5328 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5329 @*/
5330 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5331 {
5332   Mat             Ao, Ad;
5333   const PetscInt *cmap;
5334   PetscMPIInt     size;
5335   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5336 
5337   PetscFunctionBegin;
5338   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5339   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5340   if (size == 1) {
5341     if (scall == MAT_INITIAL_MATRIX) {
5342       PetscCall(PetscObjectReference((PetscObject)Ad));
5343       *A_loc = Ad;
5344     } else if (scall == MAT_REUSE_MATRIX) {
5345       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5346     }
5347     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5348     PetscFunctionReturn(PETSC_SUCCESS);
5349   }
5350   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5351   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5352   if (f) {
5353     PetscCall((*f)(A, scall, glob, A_loc));
5354   } else {
5355     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5356     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5357     Mat_SeqAIJ        *c;
5358     PetscInt          *ai = a->i, *aj = a->j;
5359     PetscInt          *bi = b->i, *bj = b->j;
5360     PetscInt          *ci, *cj;
5361     const PetscScalar *aa, *ba;
5362     PetscScalar       *ca;
5363     PetscInt           i, j, am, dn, on;
5364 
5365     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5366     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5367     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5368     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5369     if (scall == MAT_INITIAL_MATRIX) {
5370       PetscInt k;
5371       PetscCall(PetscMalloc1(1 + am, &ci));
5372       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5373       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5374       ci[0] = 0;
5375       for (i = 0, k = 0; i < am; i++) {
5376         const PetscInt ncols_o = bi[i + 1] - bi[i];
5377         const PetscInt ncols_d = ai[i + 1] - ai[i];
5378         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5379         /* diagonal portion of A */
5380         for (j = 0; j < ncols_d; j++, k++) {
5381           cj[k] = *aj++;
5382           ca[k] = *aa++;
5383         }
5384         /* off-diagonal portion of A */
5385         for (j = 0; j < ncols_o; j++, k++) {
5386           cj[k] = dn + *bj++;
5387           ca[k] = *ba++;
5388         }
5389       }
5390       /* put together the new matrix */
5391       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5392       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5393       /* Since these are PETSc arrays, change flags to free them as necessary. */
5394       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5395       c->free_a  = PETSC_TRUE;
5396       c->free_ij = PETSC_TRUE;
5397       c->nonew   = 0;
5398       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5399     } else if (scall == MAT_REUSE_MATRIX) {
5400       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5401       for (i = 0; i < am; i++) {
5402         const PetscInt ncols_d = ai[i + 1] - ai[i];
5403         const PetscInt ncols_o = bi[i + 1] - bi[i];
5404         /* diagonal portion of A */
5405         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5406         /* off-diagonal portion of A */
5407         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5408       }
5409       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5410     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5411     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5412     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5413     if (glob) {
5414       PetscInt cst, *gidx;
5415 
5416       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5417       PetscCall(PetscMalloc1(dn + on, &gidx));
5418       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5419       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5420       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5421     }
5422   }
5423   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5424   PetscFunctionReturn(PETSC_SUCCESS);
5425 }
5426 
5427 /*@C
5428      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5429 
5430     Not Collective
5431 
5432    Input Parameters:
5433 +    A - the matrix
5434 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5435 .    row - index set of rows to extract (or `NULL`)
5436 -    col - index set of columns to extract (or `NULL`)
5437 
5438    Output Parameter:
5439 .    A_loc - the local sequential matrix generated
5440 
5441     Level: developer
5442 
5443 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5444 @*/
5445 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5446 {
5447   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5448   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5449   IS          isrowa, iscola;
5450   Mat        *aloc;
5451   PetscBool   match;
5452 
5453   PetscFunctionBegin;
5454   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5455   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5456   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5457   if (!row) {
5458     start = A->rmap->rstart;
5459     end   = A->rmap->rend;
5460     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5461   } else {
5462     isrowa = *row;
5463   }
5464   if (!col) {
5465     start = A->cmap->rstart;
5466     cmap  = a->garray;
5467     nzA   = a->A->cmap->n;
5468     nzB   = a->B->cmap->n;
5469     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5470     ncols = 0;
5471     for (i = 0; i < nzB; i++) {
5472       if (cmap[i] < start) idx[ncols++] = cmap[i];
5473       else break;
5474     }
5475     imark = i;
5476     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5477     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5478     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5479   } else {
5480     iscola = *col;
5481   }
5482   if (scall != MAT_INITIAL_MATRIX) {
5483     PetscCall(PetscMalloc1(1, &aloc));
5484     aloc[0] = *A_loc;
5485   }
5486   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5487   if (!col) { /* attach global id of condensed columns */
5488     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5489   }
5490   *A_loc = aloc[0];
5491   PetscCall(PetscFree(aloc));
5492   if (!row) PetscCall(ISDestroy(&isrowa));
5493   if (!col) PetscCall(ISDestroy(&iscola));
5494   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5495   PetscFunctionReturn(PETSC_SUCCESS);
5496 }
5497 
5498 /*
5499  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5500  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5501  * on a global size.
5502  * */
5503 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5504 {
5505   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5506   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5507   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5508   PetscMPIInt            owner;
5509   PetscSFNode           *iremote, *oiremote;
5510   const PetscInt        *lrowindices;
5511   PetscSF                sf, osf;
5512   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5513   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5514   MPI_Comm               comm;
5515   ISLocalToGlobalMapping mapping;
5516   const PetscScalar     *pd_a, *po_a;
5517 
5518   PetscFunctionBegin;
5519   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5520   /* plocalsize is the number of roots
5521    * nrows is the number of leaves
5522    * */
5523   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5524   PetscCall(ISGetLocalSize(rows, &nrows));
5525   PetscCall(PetscCalloc1(nrows, &iremote));
5526   PetscCall(ISGetIndices(rows, &lrowindices));
5527   for (i = 0; i < nrows; i++) {
5528     /* Find a remote index and an owner for a row
5529      * The row could be local or remote
5530      * */
5531     owner = 0;
5532     lidx  = 0;
5533     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5534     iremote[i].index = lidx;
5535     iremote[i].rank  = owner;
5536   }
5537   /* Create SF to communicate how many nonzero columns for each row */
5538   PetscCall(PetscSFCreate(comm, &sf));
5539   /* SF will figure out the number of nonzero colunms for each row, and their
5540    * offsets
5541    * */
5542   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5543   PetscCall(PetscSFSetFromOptions(sf));
5544   PetscCall(PetscSFSetUp(sf));
5545 
5546   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5547   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5548   PetscCall(PetscCalloc1(nrows, &pnnz));
5549   roffsets[0] = 0;
5550   roffsets[1] = 0;
5551   for (i = 0; i < plocalsize; i++) {
5552     /* diag */
5553     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5554     /* off diag */
5555     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5556     /* compute offsets so that we relative location for each row */
5557     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5558     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5559   }
5560   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5561   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5562   /* 'r' means root, and 'l' means leaf */
5563   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5564   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5565   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5566   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5567   PetscCall(PetscSFDestroy(&sf));
5568   PetscCall(PetscFree(roffsets));
5569   PetscCall(PetscFree(nrcols));
5570   dntotalcols = 0;
5571   ontotalcols = 0;
5572   ncol        = 0;
5573   for (i = 0; i < nrows; i++) {
5574     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5575     ncol    = PetscMax(pnnz[i], ncol);
5576     /* diag */
5577     dntotalcols += nlcols[i * 2 + 0];
5578     /* off diag */
5579     ontotalcols += nlcols[i * 2 + 1];
5580   }
5581   /* We do not need to figure the right number of columns
5582    * since all the calculations will be done by going through the raw data
5583    * */
5584   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5585   PetscCall(MatSetUp(*P_oth));
5586   PetscCall(PetscFree(pnnz));
5587   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5588   /* diag */
5589   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5590   /* off diag */
5591   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5592   /* diag */
5593   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5594   /* off diag */
5595   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5596   dntotalcols = 0;
5597   ontotalcols = 0;
5598   ntotalcols  = 0;
5599   for (i = 0; i < nrows; i++) {
5600     owner = 0;
5601     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5602     /* Set iremote for diag matrix */
5603     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5604       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5605       iremote[dntotalcols].rank  = owner;
5606       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5607       ilocal[dntotalcols++] = ntotalcols++;
5608     }
5609     /* off diag */
5610     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5611       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5612       oiremote[ontotalcols].rank  = owner;
5613       oilocal[ontotalcols++]      = ntotalcols++;
5614     }
5615   }
5616   PetscCall(ISRestoreIndices(rows, &lrowindices));
5617   PetscCall(PetscFree(loffsets));
5618   PetscCall(PetscFree(nlcols));
5619   PetscCall(PetscSFCreate(comm, &sf));
5620   /* P serves as roots and P_oth is leaves
5621    * Diag matrix
5622    * */
5623   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5624   PetscCall(PetscSFSetFromOptions(sf));
5625   PetscCall(PetscSFSetUp(sf));
5626 
5627   PetscCall(PetscSFCreate(comm, &osf));
5628   /* Off diag */
5629   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5630   PetscCall(PetscSFSetFromOptions(osf));
5631   PetscCall(PetscSFSetUp(osf));
5632   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5633   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5634   /* We operate on the matrix internal data for saving memory */
5635   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5636   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5637   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5638   /* Convert to global indices for diag matrix */
5639   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5640   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5641   /* We want P_oth store global indices */
5642   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5643   /* Use memory scalable approach */
5644   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5645   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5646   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5647   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5648   /* Convert back to local indices */
5649   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5650   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5651   nout = 0;
5652   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5653   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5654   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5655   /* Exchange values */
5656   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5657   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5658   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5659   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5660   /* Stop PETSc from shrinking memory */
5661   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5662   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5663   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5664   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5665   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5666   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5667   PetscCall(PetscSFDestroy(&sf));
5668   PetscCall(PetscSFDestroy(&osf));
5669   PetscFunctionReturn(PETSC_SUCCESS);
5670 }
5671 
5672 /*
5673  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5674  * This supports MPIAIJ and MAIJ
5675  * */
5676 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5677 {
5678   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5679   Mat_SeqAIJ *p_oth;
5680   IS          rows, map;
5681   PetscHMapI  hamp;
5682   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5683   MPI_Comm    comm;
5684   PetscSF     sf, osf;
5685   PetscBool   has;
5686 
5687   PetscFunctionBegin;
5688   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5689   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5690   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5691    *  and then create a submatrix (that often is an overlapping matrix)
5692    * */
5693   if (reuse == MAT_INITIAL_MATRIX) {
5694     /* Use a hash table to figure out unique keys */
5695     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5696     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5697     count = 0;
5698     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5699     for (i = 0; i < a->B->cmap->n; i++) {
5700       key = a->garray[i] / dof;
5701       PetscCall(PetscHMapIHas(hamp, key, &has));
5702       if (!has) {
5703         mapping[i] = count;
5704         PetscCall(PetscHMapISet(hamp, key, count++));
5705       } else {
5706         /* Current 'i' has the same value the previous step */
5707         mapping[i] = count - 1;
5708       }
5709     }
5710     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5711     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5712     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5713     PetscCall(PetscCalloc1(htsize, &rowindices));
5714     off = 0;
5715     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5716     PetscCall(PetscHMapIDestroy(&hamp));
5717     PetscCall(PetscSortInt(htsize, rowindices));
5718     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5719     /* In case, the matrix was already created but users want to recreate the matrix */
5720     PetscCall(MatDestroy(P_oth));
5721     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5722     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5723     PetscCall(ISDestroy(&map));
5724     PetscCall(ISDestroy(&rows));
5725   } else if (reuse == MAT_REUSE_MATRIX) {
5726     /* If matrix was already created, we simply update values using SF objects
5727      * that as attached to the matrix earlier.
5728      */
5729     const PetscScalar *pd_a, *po_a;
5730 
5731     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5732     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5733     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5734     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5735     /* Update values in place */
5736     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5737     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5738     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5739     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5740     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5741     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5742     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5743     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5744   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5745   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5746   PetscFunctionReturn(PETSC_SUCCESS);
5747 }
5748 
5749 /*@C
5750   MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A`
5751 
5752   Collective
5753 
5754   Input Parameters:
5755 + A - the first matrix in `MATMPIAIJ` format
5756 . B - the second matrix in `MATMPIAIJ` format
5757 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5758 
5759   Output Parameters:
5760 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output
5761 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output
5762 - B_seq - the sequential matrix generated
5763 
5764   Level: developer
5765 
5766 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse`
5767 @*/
5768 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5769 {
5770   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5771   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5772   IS          isrowb, iscolb;
5773   Mat        *bseq = NULL;
5774 
5775   PetscFunctionBegin;
5776   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5777              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5778   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5779 
5780   if (scall == MAT_INITIAL_MATRIX) {
5781     start = A->cmap->rstart;
5782     cmap  = a->garray;
5783     nzA   = a->A->cmap->n;
5784     nzB   = a->B->cmap->n;
5785     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5786     ncols = 0;
5787     for (i = 0; i < nzB; i++) { /* row < local row index */
5788       if (cmap[i] < start) idx[ncols++] = cmap[i];
5789       else break;
5790     }
5791     imark = i;
5792     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5793     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5794     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5795     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5796   } else {
5797     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5798     isrowb = *rowb;
5799     iscolb = *colb;
5800     PetscCall(PetscMalloc1(1, &bseq));
5801     bseq[0] = *B_seq;
5802   }
5803   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5804   *B_seq = bseq[0];
5805   PetscCall(PetscFree(bseq));
5806   if (!rowb) {
5807     PetscCall(ISDestroy(&isrowb));
5808   } else {
5809     *rowb = isrowb;
5810   }
5811   if (!colb) {
5812     PetscCall(ISDestroy(&iscolb));
5813   } else {
5814     *colb = iscolb;
5815   }
5816   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5817   PetscFunctionReturn(PETSC_SUCCESS);
5818 }
5819 
5820 /*
5821     MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns
5822     of the OFF-DIAGONAL portion of local A
5823 
5824     Collective
5825 
5826    Input Parameters:
5827 +    A,B - the matrices in `MATMPIAIJ` format
5828 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5829 
5830    Output Parameter:
5831 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5832 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5833 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5834 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5835 
5836     Developer Note:
5837     This directly accesses information inside the VecScatter associated with the matrix-vector product
5838      for this matrix. This is not desirable..
5839 
5840     Level: developer
5841 
5842 */
5843 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5844 {
5845   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5846   Mat_SeqAIJ        *b_oth;
5847   VecScatter         ctx;
5848   MPI_Comm           comm;
5849   const PetscMPIInt *rprocs, *sprocs;
5850   const PetscInt    *srow, *rstarts, *sstarts;
5851   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5852   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5853   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5854   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5855   PetscMPIInt        size, tag, rank, nreqs;
5856 
5857   PetscFunctionBegin;
5858   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5859   PetscCallMPI(MPI_Comm_size(comm, &size));
5860 
5861   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
5862              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5863   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5864   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5865 
5866   if (size == 1) {
5867     startsj_s = NULL;
5868     bufa_ptr  = NULL;
5869     *B_oth    = NULL;
5870     PetscFunctionReturn(PETSC_SUCCESS);
5871   }
5872 
5873   ctx = a->Mvctx;
5874   tag = ((PetscObject)ctx)->tag;
5875 
5876   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5877   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5878   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5879   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5880   PetscCall(PetscMalloc1(nreqs, &reqs));
5881   rwaits = reqs;
5882   swaits = reqs + nrecvs;
5883 
5884   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5885   if (scall == MAT_INITIAL_MATRIX) {
5886     /* i-array */
5887     /*  post receives */
5888     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5889     for (i = 0; i < nrecvs; i++) {
5890       rowlen = rvalues + rstarts[i] * rbs;
5891       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5892       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5893     }
5894 
5895     /* pack the outgoing message */
5896     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5897 
5898     sstartsj[0] = 0;
5899     rstartsj[0] = 0;
5900     len         = 0; /* total length of j or a array to be sent */
5901     if (nsends) {
5902       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5903       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5904     }
5905     for (i = 0; i < nsends; i++) {
5906       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5907       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5908       for (j = 0; j < nrows; j++) {
5909         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5910         for (l = 0; l < sbs; l++) {
5911           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5912 
5913           rowlen[j * sbs + l] = ncols;
5914 
5915           len += ncols;
5916           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5917         }
5918         k++;
5919       }
5920       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5921 
5922       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5923     }
5924     /* recvs and sends of i-array are completed */
5925     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5926     PetscCall(PetscFree(svalues));
5927 
5928     /* allocate buffers for sending j and a arrays */
5929     PetscCall(PetscMalloc1(len + 1, &bufj));
5930     PetscCall(PetscMalloc1(len + 1, &bufa));
5931 
5932     /* create i-array of B_oth */
5933     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5934 
5935     b_othi[0] = 0;
5936     len       = 0; /* total length of j or a array to be received */
5937     k         = 0;
5938     for (i = 0; i < nrecvs; i++) {
5939       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5940       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5941       for (j = 0; j < nrows; j++) {
5942         b_othi[k + 1] = b_othi[k] + rowlen[j];
5943         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5944         k++;
5945       }
5946       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5947     }
5948     PetscCall(PetscFree(rvalues));
5949 
5950     /* allocate space for j and a arrays of B_oth */
5951     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5952     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5953 
5954     /* j-array */
5955     /*  post receives of j-array */
5956     for (i = 0; i < nrecvs; i++) {
5957       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5958       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5959     }
5960 
5961     /* pack the outgoing message j-array */
5962     if (nsends) k = sstarts[0];
5963     for (i = 0; i < nsends; i++) {
5964       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5965       bufJ  = bufj + sstartsj[i];
5966       for (j = 0; j < nrows; j++) {
5967         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5968         for (ll = 0; ll < sbs; ll++) {
5969           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5970           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5971           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5972         }
5973       }
5974       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5975     }
5976 
5977     /* recvs and sends of j-array are completed */
5978     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5979   } else if (scall == MAT_REUSE_MATRIX) {
5980     sstartsj = *startsj_s;
5981     rstartsj = *startsj_r;
5982     bufa     = *bufa_ptr;
5983     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5984     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
5985   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5986 
5987   /* a-array */
5988   /*  post receives of a-array */
5989   for (i = 0; i < nrecvs; i++) {
5990     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5991     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
5992   }
5993 
5994   /* pack the outgoing message a-array */
5995   if (nsends) k = sstarts[0];
5996   for (i = 0; i < nsends; i++) {
5997     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5998     bufA  = bufa + sstartsj[i];
5999     for (j = 0; j < nrows; j++) {
6000       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6001       for (ll = 0; ll < sbs; ll++) {
6002         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6003         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6004         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6005       }
6006     }
6007     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6008   }
6009   /* recvs and sends of a-array are completed */
6010   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6011   PetscCall(PetscFree(reqs));
6012 
6013   if (scall == MAT_INITIAL_MATRIX) {
6014     /* put together the new matrix */
6015     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6016 
6017     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6018     /* Since these are PETSc arrays, change flags to free them as necessary. */
6019     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6020     b_oth->free_a  = PETSC_TRUE;
6021     b_oth->free_ij = PETSC_TRUE;
6022     b_oth->nonew   = 0;
6023 
6024     PetscCall(PetscFree(bufj));
6025     if (!startsj_s || !bufa_ptr) {
6026       PetscCall(PetscFree2(sstartsj, rstartsj));
6027       PetscCall(PetscFree(bufa_ptr));
6028     } else {
6029       *startsj_s = sstartsj;
6030       *startsj_r = rstartsj;
6031       *bufa_ptr  = bufa;
6032     }
6033   } else if (scall == MAT_REUSE_MATRIX) {
6034     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6035   }
6036 
6037   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6038   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6039   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6040   PetscFunctionReturn(PETSC_SUCCESS);
6041 }
6042 
6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6046 #if defined(PETSC_HAVE_MKL_SPARSE)
6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6048 #endif
6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6051 #if defined(PETSC_HAVE_ELEMENTAL)
6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6053 #endif
6054 #if defined(PETSC_HAVE_SCALAPACK)
6055 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6056 #endif
6057 #if defined(PETSC_HAVE_HYPRE)
6058 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6059 #endif
6060 #if defined(PETSC_HAVE_CUDA)
6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6062 #endif
6063 #if defined(PETSC_HAVE_HIP)
6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6065 #endif
6066 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6067 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6068 #endif
6069 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6070 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6071 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6072 
6073 /*
6074     Computes (B'*A')' since computing B*A directly is untenable
6075 
6076                n                       p                          p
6077         [             ]       [             ]         [                 ]
6078       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6079         [             ]       [             ]         [                 ]
6080 
6081 */
6082 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6083 {
6084   Mat At, Bt, Ct;
6085 
6086   PetscFunctionBegin;
6087   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6088   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6089   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6090   PetscCall(MatDestroy(&At));
6091   PetscCall(MatDestroy(&Bt));
6092   PetscCall(MatTransposeSetPrecursor(Ct, C));
6093   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6094   PetscCall(MatDestroy(&Ct));
6095   PetscFunctionReturn(PETSC_SUCCESS);
6096 }
6097 
6098 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6099 {
6100   PetscBool cisdense;
6101 
6102   PetscFunctionBegin;
6103   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6104   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6105   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6106   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6107   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6108   PetscCall(MatSetUp(C));
6109 
6110   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6111   PetscFunctionReturn(PETSC_SUCCESS);
6112 }
6113 
6114 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6115 {
6116   Mat_Product *product = C->product;
6117   Mat          A = product->A, B = product->B;
6118 
6119   PetscFunctionBegin;
6120   PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",
6121              A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6122   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6123   C->ops->productsymbolic = MatProductSymbolic_AB;
6124   PetscFunctionReturn(PETSC_SUCCESS);
6125 }
6126 
6127 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6128 {
6129   Mat_Product *product = C->product;
6130 
6131   PetscFunctionBegin;
6132   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6133   PetscFunctionReturn(PETSC_SUCCESS);
6134 }
6135 
6136 /*
6137    Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6138 
6139   Input Parameters:
6140 
6141     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6142     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6143 
6144     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6145 
6146     For Set1, j1[] contains column indices of the nonzeros.
6147     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6148     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6149     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6150 
6151     Similar for Set2.
6152 
6153     This routine merges the two sets of nonzeros row by row and removes repeats.
6154 
6155   Output Parameters: (memory is allocated by the caller)
6156 
6157     i[],j[]: the CSR of the merged matrix, which has m rows.
6158     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6159     imap2[]: similar to imap1[], but for Set2.
6160     Note we order nonzeros row-by-row and from left to right.
6161 */
6162 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6163 {
6164   PetscInt   r, m; /* Row index of mat */
6165   PetscCount t, t1, t2, b1, e1, b2, e2;
6166 
6167   PetscFunctionBegin;
6168   PetscCall(MatGetLocalSize(mat, &m, NULL));
6169   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6170   i[0]        = 0;
6171   for (r = 0; r < m; r++) { /* Do row by row merging */
6172     b1 = rowBegin1[r];
6173     e1 = rowEnd1[r];
6174     b2 = rowBegin2[r];
6175     e2 = rowEnd2[r];
6176     while (b1 < e1 && b2 < e2) {
6177       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6178         j[t]      = j1[b1];
6179         imap1[t1] = t;
6180         imap2[t2] = t;
6181         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6182         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6183         t1++;
6184         t2++;
6185         t++;
6186       } else if (j1[b1] < j2[b2]) {
6187         j[t]      = j1[b1];
6188         imap1[t1] = t;
6189         b1 += jmap1[t1 + 1] - jmap1[t1];
6190         t1++;
6191         t++;
6192       } else {
6193         j[t]      = j2[b2];
6194         imap2[t2] = t;
6195         b2 += jmap2[t2 + 1] - jmap2[t2];
6196         t2++;
6197         t++;
6198       }
6199     }
6200     /* Merge the remaining in either j1[] or j2[] */
6201     while (b1 < e1) {
6202       j[t]      = j1[b1];
6203       imap1[t1] = t;
6204       b1 += jmap1[t1 + 1] - jmap1[t1];
6205       t1++;
6206       t++;
6207     }
6208     while (b2 < e2) {
6209       j[t]      = j2[b2];
6210       imap2[t2] = t;
6211       b2 += jmap2[t2 + 1] - jmap2[t2];
6212       t2++;
6213       t++;
6214     }
6215     i[r + 1] = t;
6216   }
6217   PetscFunctionReturn(PETSC_SUCCESS);
6218 }
6219 
6220 /*
6221   Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6222 
6223   Input Parameters:
6224     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6225     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6226       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6227 
6228       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6229       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6230 
6231   Output Parameters:
6232     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6233     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6234       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6235       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6236 
6237     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6238       Atot: number of entries belonging to the diagonal block.
6239       Annz: number of unique nonzeros belonging to the diagonal block.
6240       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6241         repeats (i.e., same 'i,j' pair).
6242       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6243         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6244 
6245       Atot: number of entries belonging to the diagonal block
6246       Annz: number of unique nonzeros belonging to the diagonal block.
6247 
6248     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6249 
6250     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6251 */
6252 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6253 {
6254   PetscInt    cstart, cend, rstart, rend, row, col;
6255   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6256   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6257   PetscCount  k, m, p, q, r, s, mid;
6258   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6259 
6260   PetscFunctionBegin;
6261   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6262   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6263   m = rend - rstart;
6264 
6265   for (k = 0; k < n; k++) {
6266     if (i[k] >= 0) break;
6267   } /* Skip negative rows */
6268 
6269   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6270      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6271   */
6272   while (k < n) {
6273     row = i[k];
6274     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6275     for (s = k; s < n; s++)
6276       if (i[s] != row) break;
6277     for (p = k; p < s; p++) {
6278       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6279       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6280     }
6281     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6282     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6283     rowBegin[row - rstart] = k;
6284     rowMid[row - rstart]   = mid;
6285     rowEnd[row - rstart]   = s;
6286 
6287     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6288     Atot += mid - k;
6289     Btot += s - mid;
6290 
6291     /* Count unique nonzeros of this diag/offdiag row */
6292     for (p = k; p < mid;) {
6293       col = j[p];
6294       do {
6295         j[p] += PETSC_MAX_INT;
6296         p++;
6297       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6298       Annz++;
6299     }
6300 
6301     for (p = mid; p < s;) {
6302       col = j[p];
6303       do {
6304         p++;
6305       } while (p < s && j[p] == col);
6306       Bnnz++;
6307     }
6308     k = s;
6309   }
6310 
6311   /* Allocation according to Atot, Btot, Annz, Bnnz */
6312   PetscCall(PetscMalloc1(Atot, &Aperm));
6313   PetscCall(PetscMalloc1(Btot, &Bperm));
6314   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6315   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6316 
6317   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6318   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6319   for (r = 0; r < m; r++) {
6320     k   = rowBegin[r];
6321     mid = rowMid[r];
6322     s   = rowEnd[r];
6323     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6324     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6325     Atot += mid - k;
6326     Btot += s - mid;
6327 
6328     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6329     for (p = k; p < mid;) {
6330       col = j[p];
6331       q   = p;
6332       do {
6333         p++;
6334       } while (p < mid && j[p] == col);
6335       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6336       Annz++;
6337     }
6338 
6339     for (p = mid; p < s;) {
6340       col = j[p];
6341       q   = p;
6342       do {
6343         p++;
6344       } while (p < s && j[p] == col);
6345       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6346       Bnnz++;
6347     }
6348   }
6349   /* Output */
6350   *Aperm_ = Aperm;
6351   *Annz_  = Annz;
6352   *Atot_  = Atot;
6353   *Ajmap_ = Ajmap;
6354   *Bperm_ = Bperm;
6355   *Bnnz_  = Bnnz;
6356   *Btot_  = Btot;
6357   *Bjmap_ = Bjmap;
6358   PetscFunctionReturn(PETSC_SUCCESS);
6359 }
6360 
6361 /*
6362   Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6363 
6364   Input Parameters:
6365     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6366     nnz:  number of unique nonzeros in the merged matrix
6367     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6368     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6369 
6370   Output Parameter: (memory is allocated by the caller)
6371     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6372 
6373   Example:
6374     nnz1 = 4
6375     nnz  = 6
6376     imap = [1,3,4,5]
6377     jmap = [0,3,5,6,7]
6378    then,
6379     jmap_new = [0,0,3,3,5,6,7]
6380 */
6381 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6382 {
6383   PetscCount k, p;
6384 
6385   PetscFunctionBegin;
6386   jmap_new[0] = 0;
6387   p           = nnz;                /* p loops over jmap_new[] backwards */
6388   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6389     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6390   }
6391   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6392   PetscFunctionReturn(PETSC_SUCCESS);
6393 }
6394 
6395 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6396 {
6397   MPI_Comm    comm;
6398   PetscMPIInt rank, size;
6399   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6400   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6401   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6402 
6403   PetscFunctionBegin;
6404   PetscCall(PetscFree(mpiaij->garray));
6405   PetscCall(VecDestroy(&mpiaij->lvec));
6406 #if defined(PETSC_USE_CTABLE)
6407   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6408 #else
6409   PetscCall(PetscFree(mpiaij->colmap));
6410 #endif
6411   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6412   mat->assembled     = PETSC_FALSE;
6413   mat->was_assembled = PETSC_FALSE;
6414   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6415 
6416   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6417   PetscCallMPI(MPI_Comm_size(comm, &size));
6418   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6419   PetscCall(PetscLayoutSetUp(mat->rmap));
6420   PetscCall(PetscLayoutSetUp(mat->cmap));
6421   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6422   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6423   PetscCall(MatGetLocalSize(mat, &m, &n));
6424   PetscCall(MatGetSize(mat, &M, &N));
6425 
6426   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6427   /* entries come first, then local rows, then remote rows.                     */
6428   PetscCount n1 = coo_n, *perm1;
6429   PetscInt  *i1 = coo_i, *j1 = coo_j;
6430 
6431   PetscCall(PetscMalloc1(n1, &perm1));
6432   for (k = 0; k < n1; k++) perm1[k] = k;
6433 
6434   /* Manipulate indices so that entries with negative row or col indices will have smallest
6435      row indices, local entries will have greater but negative row indices, and remote entries
6436      will have positive row indices.
6437   */
6438   for (k = 0; k < n1; k++) {
6439     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6440     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6441     else {
6442       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6443       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6444     }
6445   }
6446 
6447   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6448   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6449   for (k = 0; k < n1; k++) {
6450     if (i1[k] > PETSC_MIN_INT) break;
6451   }                                                                               /* Advance k to the first entry we need to take care of */
6452   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6453   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6454 
6455   /*           Split local rows into diag/offdiag portions                      */
6456   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6457   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6458   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6459 
6460   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6461   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6462   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6463 
6464   /*           Send remote rows to their owner                                  */
6465   /* Find which rows should be sent to which remote ranks*/
6466   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6467   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6468   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6469   const PetscInt *ranges;
6470   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6471 
6472   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6473   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6474   for (k = rem; k < n1;) {
6475     PetscMPIInt owner;
6476     PetscInt    firstRow, lastRow;
6477 
6478     /* Locate a row range */
6479     firstRow = i1[k]; /* first row of this owner */
6480     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6481     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6482 
6483     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6484     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6485 
6486     /* All entries in [k,p) belong to this remote owner */
6487     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6488       PetscMPIInt *sendto2;
6489       PetscInt    *nentries2;
6490       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6491 
6492       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6493       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6494       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6495       PetscCall(PetscFree2(sendto, nentries2));
6496       sendto   = sendto2;
6497       nentries = nentries2;
6498       maxNsend = maxNsend2;
6499     }
6500     sendto[nsend]   = owner;
6501     nentries[nsend] = p - k;
6502     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6503     nsend++;
6504     k = p;
6505   }
6506 
6507   /* Build 1st SF to know offsets on remote to send data */
6508   PetscSF      sf1;
6509   PetscInt     nroots = 1, nroots2 = 0;
6510   PetscInt     nleaves = nsend, nleaves2 = 0;
6511   PetscInt    *offsets;
6512   PetscSFNode *iremote;
6513 
6514   PetscCall(PetscSFCreate(comm, &sf1));
6515   PetscCall(PetscMalloc1(nsend, &iremote));
6516   PetscCall(PetscMalloc1(nsend, &offsets));
6517   for (k = 0; k < nsend; k++) {
6518     iremote[k].rank  = sendto[k];
6519     iremote[k].index = 0;
6520     nleaves2 += nentries[k];
6521     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6522   }
6523   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6524   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6525   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6526   PetscCall(PetscSFDestroy(&sf1));
6527   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6528 
6529   /* Build 2nd SF to send remote COOs to their owner */
6530   PetscSF sf2;
6531   nroots  = nroots2;
6532   nleaves = nleaves2;
6533   PetscCall(PetscSFCreate(comm, &sf2));
6534   PetscCall(PetscSFSetFromOptions(sf2));
6535   PetscCall(PetscMalloc1(nleaves, &iremote));
6536   p = 0;
6537   for (k = 0; k < nsend; k++) {
6538     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6539     for (q = 0; q < nentries[k]; q++, p++) {
6540       iremote[p].rank  = sendto[k];
6541       iremote[p].index = offsets[k] + q;
6542     }
6543   }
6544   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6545 
6546   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6547   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6548 
6549   /* Send the remote COOs to their owner */
6550   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6551   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6552   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6553   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6554   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6555   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6556   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6557 
6558   PetscCall(PetscFree(offsets));
6559   PetscCall(PetscFree2(sendto, nentries));
6560 
6561   /* Sort received COOs by row along with the permutation array     */
6562   for (k = 0; k < n2; k++) perm2[k] = k;
6563   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6564 
6565   /* Split received COOs into diag/offdiag portions                 */
6566   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6567   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6568   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6569 
6570   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6571   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6572 
6573   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6574   PetscInt *Ai, *Bi;
6575   PetscInt *Aj, *Bj;
6576 
6577   PetscCall(PetscMalloc1(m + 1, &Ai));
6578   PetscCall(PetscMalloc1(m + 1, &Bi));
6579   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6580   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6581 
6582   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6583   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6584   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6585   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6586   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6587 
6588   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6589   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6590 
6591   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6592   /* expect nonzeros in A/B most likely have local contributing entries        */
6593   PetscInt    Annz = Ai[m];
6594   PetscInt    Bnnz = Bi[m];
6595   PetscCount *Ajmap1_new, *Bjmap1_new;
6596 
6597   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6598   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6599 
6600   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6601   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6602 
6603   PetscCall(PetscFree(Aimap1));
6604   PetscCall(PetscFree(Ajmap1));
6605   PetscCall(PetscFree(Bimap1));
6606   PetscCall(PetscFree(Bjmap1));
6607   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6608   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6609   PetscCall(PetscFree(perm1));
6610   PetscCall(PetscFree3(i2, j2, perm2));
6611 
6612   Ajmap1 = Ajmap1_new;
6613   Bjmap1 = Bjmap1_new;
6614 
6615   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6616   if (Annz < Annz1 + Annz2) {
6617     PetscInt *Aj_new;
6618     PetscCall(PetscMalloc1(Annz, &Aj_new));
6619     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6620     PetscCall(PetscFree(Aj));
6621     Aj = Aj_new;
6622   }
6623 
6624   if (Bnnz < Bnnz1 + Bnnz2) {
6625     PetscInt *Bj_new;
6626     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6627     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6628     PetscCall(PetscFree(Bj));
6629     Bj = Bj_new;
6630   }
6631 
6632   /* Create new submatrices for on-process and off-process coupling                  */
6633   PetscScalar *Aa, *Ba;
6634   MatType      rtype;
6635   Mat_SeqAIJ  *a, *b;
6636   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6637   PetscCall(PetscCalloc1(Bnnz, &Ba));
6638   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6639   if (cstart) {
6640     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6641   }
6642   PetscCall(MatDestroy(&mpiaij->A));
6643   PetscCall(MatDestroy(&mpiaij->B));
6644   PetscCall(MatGetRootType_Private(mat, &rtype));
6645   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6646   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6647   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6648 
6649   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6650   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6651   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6652   a->free_a = b->free_a = PETSC_TRUE;
6653   a->free_ij = b->free_ij = PETSC_TRUE;
6654 
6655   /* conversion must happen AFTER multiply setup */
6656   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6657   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6658   PetscCall(VecDestroy(&mpiaij->lvec));
6659   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6660 
6661   mpiaij->coo_n   = coo_n;
6662   mpiaij->coo_sf  = sf2;
6663   mpiaij->sendlen = nleaves;
6664   mpiaij->recvlen = nroots;
6665 
6666   mpiaij->Annz = Annz;
6667   mpiaij->Bnnz = Bnnz;
6668 
6669   mpiaij->Annz2 = Annz2;
6670   mpiaij->Bnnz2 = Bnnz2;
6671 
6672   mpiaij->Atot1 = Atot1;
6673   mpiaij->Atot2 = Atot2;
6674   mpiaij->Btot1 = Btot1;
6675   mpiaij->Btot2 = Btot2;
6676 
6677   mpiaij->Ajmap1 = Ajmap1;
6678   mpiaij->Aperm1 = Aperm1;
6679 
6680   mpiaij->Bjmap1 = Bjmap1;
6681   mpiaij->Bperm1 = Bperm1;
6682 
6683   mpiaij->Aimap2 = Aimap2;
6684   mpiaij->Ajmap2 = Ajmap2;
6685   mpiaij->Aperm2 = Aperm2;
6686 
6687   mpiaij->Bimap2 = Bimap2;
6688   mpiaij->Bjmap2 = Bjmap2;
6689   mpiaij->Bperm2 = Bperm2;
6690 
6691   mpiaij->Cperm1 = Cperm1;
6692 
6693   /* Allocate in preallocation. If not used, it has zero cost on host */
6694   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6695   PetscFunctionReturn(PETSC_SUCCESS);
6696 }
6697 
6698 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6699 {
6700   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6701   Mat               A = mpiaij->A, B = mpiaij->B;
6702   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6703   PetscScalar      *Aa, *Ba;
6704   PetscScalar      *sendbuf = mpiaij->sendbuf;
6705   PetscScalar      *recvbuf = mpiaij->recvbuf;
6706   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6707   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6708   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6709   const PetscCount *Cperm1 = mpiaij->Cperm1;
6710 
6711   PetscFunctionBegin;
6712   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6713   PetscCall(MatSeqAIJGetArray(B, &Ba));
6714 
6715   /* Pack entries to be sent to remote */
6716   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6717 
6718   /* Send remote entries to their owner and overlap the communication with local computation */
6719   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6720   /* Add local entries to A and B */
6721   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6722     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stability */
6723     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6724     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6725   }
6726   for (PetscCount i = 0; i < Bnnz; i++) {
6727     PetscScalar sum = 0.0;
6728     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6729     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6730   }
6731   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6732 
6733   /* Add received remote entries to A and B */
6734   for (PetscCount i = 0; i < Annz2; i++) {
6735     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6736   }
6737   for (PetscCount i = 0; i < Bnnz2; i++) {
6738     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6739   }
6740   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6741   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6742   PetscFunctionReturn(PETSC_SUCCESS);
6743 }
6744 
6745 /*MC
6746    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6747 
6748    Options Database Keys:
6749 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6750 
6751    Level: beginner
6752 
6753    Notes:
6754    `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values,
6755     in this case the values associated with the rows and columns one passes in are set to zero
6756     in the matrix
6757 
6758     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6759     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6760 
6761 .seealso: [](chapter_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6762 M*/
6763 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6764 {
6765   Mat_MPIAIJ *b;
6766   PetscMPIInt size;
6767 
6768   PetscFunctionBegin;
6769   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6770 
6771   PetscCall(PetscNew(&b));
6772   B->data = (void *)b;
6773   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6774   B->assembled  = PETSC_FALSE;
6775   B->insertmode = NOT_SET_VALUES;
6776   b->size       = size;
6777 
6778   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6779 
6780   /* build cache for off array entries formed */
6781   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6782 
6783   b->donotstash  = PETSC_FALSE;
6784   b->colmap      = NULL;
6785   b->garray      = NULL;
6786   b->roworiented = PETSC_TRUE;
6787 
6788   /* stuff used for matrix vector multiply */
6789   b->lvec  = NULL;
6790   b->Mvctx = NULL;
6791 
6792   /* stuff for MatGetRow() */
6793   b->rowindices   = NULL;
6794   b->rowvalues    = NULL;
6795   b->getrowactive = PETSC_FALSE;
6796 
6797   /* flexible pointer used in CUSPARSE classes */
6798   b->spptr = NULL;
6799 
6800   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6801   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6802   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6803   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6804   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6805   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6806   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6807   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6808   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6809   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6810 #if defined(PETSC_HAVE_CUDA)
6811   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6812 #endif
6813 #if defined(PETSC_HAVE_HIP)
6814   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6815 #endif
6816 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6817   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6818 #endif
6819 #if defined(PETSC_HAVE_MKL_SPARSE)
6820   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6821 #endif
6822   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6823   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6824   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6825   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6826 #if defined(PETSC_HAVE_ELEMENTAL)
6827   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6828 #endif
6829 #if defined(PETSC_HAVE_SCALAPACK)
6830   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6831 #endif
6832   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6833   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6834 #if defined(PETSC_HAVE_HYPRE)
6835   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6837 #endif
6838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6840   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6842   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6843   PetscFunctionReturn(PETSC_SUCCESS);
6844 }
6845 
6846 /*@C
6847      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6848          and "off-diagonal" part of the matrix in CSR format.
6849 
6850    Collective
6851 
6852    Input Parameters:
6853 +  comm - MPI communicator
6854 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6855 .  n - This value should be the same as the local size used in creating the
6856        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6857        calculated if `N` is given) For square matrices `n` is almost always `m`.
6858 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
6859 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
6860 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6861 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6862 .   a - matrix values
6863 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6864 .   oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix
6865 -   oa - matrix values
6866 
6867    Output Parameter:
6868 .   mat - the matrix
6869 
6870    Level: advanced
6871 
6872    Notes:
6873        The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6874        must free the arrays once the matrix has been destroyed and not before.
6875 
6876        The `i` and `j` indices are 0 based
6877 
6878        See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix
6879 
6880        This sets local rows and cannot be used to set off-processor values.
6881 
6882        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6883        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6884        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6885        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6886        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6887        communication if it is known that only local entries will be set.
6888 
6889 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6890           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6891 @*/
6892 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6893 {
6894   Mat_MPIAIJ *maij;
6895 
6896   PetscFunctionBegin;
6897   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6898   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6899   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6900   PetscCall(MatCreate(comm, mat));
6901   PetscCall(MatSetSizes(*mat, m, n, M, N));
6902   PetscCall(MatSetType(*mat, MATMPIAIJ));
6903   maij = (Mat_MPIAIJ *)(*mat)->data;
6904 
6905   (*mat)->preallocated = PETSC_TRUE;
6906 
6907   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6908   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6909 
6910   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6911   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6912 
6913   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6914   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6915   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6916   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6917   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6918   PetscFunctionReturn(PETSC_SUCCESS);
6919 }
6920 
6921 typedef struct {
6922   Mat       *mp;    /* intermediate products */
6923   PetscBool *mptmp; /* is the intermediate product temporary ? */
6924   PetscInt   cp;    /* number of intermediate products */
6925 
6926   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6927   PetscInt    *startsj_s, *startsj_r;
6928   PetscScalar *bufa;
6929   Mat          P_oth;
6930 
6931   /* may take advantage of merging product->B */
6932   Mat Bloc; /* B-local by merging diag and off-diag */
6933 
6934   /* cusparse does not have support to split between symbolic and numeric phases.
6935      When api_user is true, we don't need to update the numerical values
6936      of the temporary storage */
6937   PetscBool reusesym;
6938 
6939   /* support for COO values insertion */
6940   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6941   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6942   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6943   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6944   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6945   PetscMemType mtype;
6946 
6947   /* customization */
6948   PetscBool abmerge;
6949   PetscBool P_oth_bind;
6950 } MatMatMPIAIJBACKEND;
6951 
6952 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6953 {
6954   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6955   PetscInt             i;
6956 
6957   PetscFunctionBegin;
6958   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6959   PetscCall(PetscFree(mmdata->bufa));
6960   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6961   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6962   PetscCall(MatDestroy(&mmdata->P_oth));
6963   PetscCall(MatDestroy(&mmdata->Bloc));
6964   PetscCall(PetscSFDestroy(&mmdata->sf));
6965   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6966   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6967   PetscCall(PetscFree(mmdata->own[0]));
6968   PetscCall(PetscFree(mmdata->own));
6969   PetscCall(PetscFree(mmdata->off[0]));
6970   PetscCall(PetscFree(mmdata->off));
6971   PetscCall(PetscFree(mmdata));
6972   PetscFunctionReturn(PETSC_SUCCESS);
6973 }
6974 
6975 /* Copy selected n entries with indices in idx[] of A to v[].
6976    If idx is NULL, copy the whole data array of A to v[]
6977  */
6978 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6979 {
6980   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
6981 
6982   PetscFunctionBegin;
6983   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
6984   if (f) {
6985     PetscCall((*f)(A, n, idx, v));
6986   } else {
6987     const PetscScalar *vv;
6988 
6989     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
6990     if (n && idx) {
6991       PetscScalar    *w  = v;
6992       const PetscInt *oi = idx;
6993       PetscInt        j;
6994 
6995       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6996     } else {
6997       PetscCall(PetscArraycpy(v, vv, n));
6998     }
6999     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7000   }
7001   PetscFunctionReturn(PETSC_SUCCESS);
7002 }
7003 
7004 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7005 {
7006   MatMatMPIAIJBACKEND *mmdata;
7007   PetscInt             i, n_d, n_o;
7008 
7009   PetscFunctionBegin;
7010   MatCheckProduct(C, 1);
7011   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7012   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7013   if (!mmdata->reusesym) { /* update temporary matrices */
7014     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7015     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7016   }
7017   mmdata->reusesym = PETSC_FALSE;
7018 
7019   for (i = 0; i < mmdata->cp; i++) {
7020     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7021     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7022   }
7023   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7024     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7025 
7026     if (mmdata->mptmp[i]) continue;
7027     if (noff) {
7028       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7029 
7030       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7031       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7032       n_o += noff;
7033       n_d += nown;
7034     } else {
7035       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7036 
7037       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7038       n_d += mm->nz;
7039     }
7040   }
7041   if (mmdata->hasoffproc) { /* offprocess insertion */
7042     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7043     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7044   }
7045   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7046   PetscFunctionReturn(PETSC_SUCCESS);
7047 }
7048 
7049 /* Support for Pt * A, A * P, or Pt * A * P */
7050 #define MAX_NUMBER_INTERMEDIATE 4
7051 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7052 {
7053   Mat_Product           *product = C->product;
7054   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7055   Mat_MPIAIJ            *a, *p;
7056   MatMatMPIAIJBACKEND   *mmdata;
7057   ISLocalToGlobalMapping P_oth_l2g = NULL;
7058   IS                     glob      = NULL;
7059   const char            *prefix;
7060   char                   pprefix[256];
7061   const PetscInt        *globidx, *P_oth_idx;
7062   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7063   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7064   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7065                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7066                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7067   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7068 
7069   MatProductType ptype;
7070   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7071   PetscMPIInt    size;
7072 
7073   PetscFunctionBegin;
7074   MatCheckProduct(C, 1);
7075   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7076   ptype = product->type;
7077   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7078     ptype                                          = MATPRODUCT_AB;
7079     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7080   }
7081   switch (ptype) {
7082   case MATPRODUCT_AB:
7083     A          = product->A;
7084     P          = product->B;
7085     m          = A->rmap->n;
7086     n          = P->cmap->n;
7087     M          = A->rmap->N;
7088     N          = P->cmap->N;
7089     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7090     break;
7091   case MATPRODUCT_AtB:
7092     P          = product->A;
7093     A          = product->B;
7094     m          = P->cmap->n;
7095     n          = A->cmap->n;
7096     M          = P->cmap->N;
7097     N          = A->cmap->N;
7098     hasoffproc = PETSC_TRUE;
7099     break;
7100   case MATPRODUCT_PtAP:
7101     A          = product->A;
7102     P          = product->B;
7103     m          = P->cmap->n;
7104     n          = P->cmap->n;
7105     M          = P->cmap->N;
7106     N          = P->cmap->N;
7107     hasoffproc = PETSC_TRUE;
7108     break;
7109   default:
7110     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7111   }
7112   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7113   if (size == 1) hasoffproc = PETSC_FALSE;
7114 
7115   /* defaults */
7116   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7117     mp[i]    = NULL;
7118     mptmp[i] = PETSC_FALSE;
7119     rmapt[i] = -1;
7120     cmapt[i] = -1;
7121     rmapa[i] = NULL;
7122     cmapa[i] = NULL;
7123   }
7124 
7125   /* customization */
7126   PetscCall(PetscNew(&mmdata));
7127   mmdata->reusesym = product->api_user;
7128   if (ptype == MATPRODUCT_AB) {
7129     if (product->api_user) {
7130       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7131       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7132       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7133       PetscOptionsEnd();
7134     } else {
7135       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7136       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7137       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7138       PetscOptionsEnd();
7139     }
7140   } else if (ptype == MATPRODUCT_PtAP) {
7141     if (product->api_user) {
7142       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7143       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7144       PetscOptionsEnd();
7145     } else {
7146       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7147       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7148       PetscOptionsEnd();
7149     }
7150   }
7151   a = (Mat_MPIAIJ *)A->data;
7152   p = (Mat_MPIAIJ *)P->data;
7153   PetscCall(MatSetSizes(C, m, n, M, N));
7154   PetscCall(PetscLayoutSetUp(C->rmap));
7155   PetscCall(PetscLayoutSetUp(C->cmap));
7156   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7157   PetscCall(MatGetOptionsPrefix(C, &prefix));
7158 
7159   cp = 0;
7160   switch (ptype) {
7161   case MATPRODUCT_AB: /* A * P */
7162     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7163 
7164     /* A_diag * P_local (merged or not) */
7165     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7166       /* P is product->B */
7167       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7168       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7169       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7170       PetscCall(MatProductSetFill(mp[cp], product->fill));
7171       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7172       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7173       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7174       mp[cp]->product->api_user = product->api_user;
7175       PetscCall(MatProductSetFromOptions(mp[cp]));
7176       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7177       PetscCall(ISGetIndices(glob, &globidx));
7178       rmapt[cp] = 1;
7179       cmapt[cp] = 2;
7180       cmapa[cp] = globidx;
7181       mptmp[cp] = PETSC_FALSE;
7182       cp++;
7183     } else { /* A_diag * P_diag and A_diag * P_off */
7184       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7185       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7186       PetscCall(MatProductSetFill(mp[cp], product->fill));
7187       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7188       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7189       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7190       mp[cp]->product->api_user = product->api_user;
7191       PetscCall(MatProductSetFromOptions(mp[cp]));
7192       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7193       rmapt[cp] = 1;
7194       cmapt[cp] = 1;
7195       mptmp[cp] = PETSC_FALSE;
7196       cp++;
7197       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7198       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7199       PetscCall(MatProductSetFill(mp[cp], product->fill));
7200       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7201       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7202       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7203       mp[cp]->product->api_user = product->api_user;
7204       PetscCall(MatProductSetFromOptions(mp[cp]));
7205       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7206       rmapt[cp] = 1;
7207       cmapt[cp] = 2;
7208       cmapa[cp] = p->garray;
7209       mptmp[cp] = PETSC_FALSE;
7210       cp++;
7211     }
7212 
7213     /* A_off * P_other */
7214     if (mmdata->P_oth) {
7215       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7216       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7217       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7218       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7219       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7220       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7221       PetscCall(MatProductSetFill(mp[cp], product->fill));
7222       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7223       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7224       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7225       mp[cp]->product->api_user = product->api_user;
7226       PetscCall(MatProductSetFromOptions(mp[cp]));
7227       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7228       rmapt[cp] = 1;
7229       cmapt[cp] = 2;
7230       cmapa[cp] = P_oth_idx;
7231       mptmp[cp] = PETSC_FALSE;
7232       cp++;
7233     }
7234     break;
7235 
7236   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7237     /* A is product->B */
7238     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7239     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7240       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7241       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7242       PetscCall(MatProductSetFill(mp[cp], product->fill));
7243       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7244       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7245       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7246       mp[cp]->product->api_user = product->api_user;
7247       PetscCall(MatProductSetFromOptions(mp[cp]));
7248       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7249       PetscCall(ISGetIndices(glob, &globidx));
7250       rmapt[cp] = 2;
7251       rmapa[cp] = globidx;
7252       cmapt[cp] = 2;
7253       cmapa[cp] = globidx;
7254       mptmp[cp] = PETSC_FALSE;
7255       cp++;
7256     } else {
7257       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7258       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7259       PetscCall(MatProductSetFill(mp[cp], product->fill));
7260       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7261       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7262       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7263       mp[cp]->product->api_user = product->api_user;
7264       PetscCall(MatProductSetFromOptions(mp[cp]));
7265       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7266       PetscCall(ISGetIndices(glob, &globidx));
7267       rmapt[cp] = 1;
7268       cmapt[cp] = 2;
7269       cmapa[cp] = globidx;
7270       mptmp[cp] = PETSC_FALSE;
7271       cp++;
7272       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7273       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7274       PetscCall(MatProductSetFill(mp[cp], product->fill));
7275       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7276       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7277       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7278       mp[cp]->product->api_user = product->api_user;
7279       PetscCall(MatProductSetFromOptions(mp[cp]));
7280       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7281       rmapt[cp] = 2;
7282       rmapa[cp] = p->garray;
7283       cmapt[cp] = 2;
7284       cmapa[cp] = globidx;
7285       mptmp[cp] = PETSC_FALSE;
7286       cp++;
7287     }
7288     break;
7289   case MATPRODUCT_PtAP:
7290     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7291     /* P is product->B */
7292     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7293     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7294     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7295     PetscCall(MatProductSetFill(mp[cp], product->fill));
7296     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7297     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7298     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7299     mp[cp]->product->api_user = product->api_user;
7300     PetscCall(MatProductSetFromOptions(mp[cp]));
7301     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7302     PetscCall(ISGetIndices(glob, &globidx));
7303     rmapt[cp] = 2;
7304     rmapa[cp] = globidx;
7305     cmapt[cp] = 2;
7306     cmapa[cp] = globidx;
7307     mptmp[cp] = PETSC_FALSE;
7308     cp++;
7309     if (mmdata->P_oth) {
7310       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7311       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7312       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7313       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7314       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7315       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7316       PetscCall(MatProductSetFill(mp[cp], product->fill));
7317       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7318       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7319       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7320       mp[cp]->product->api_user = product->api_user;
7321       PetscCall(MatProductSetFromOptions(mp[cp]));
7322       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7323       mptmp[cp] = PETSC_TRUE;
7324       cp++;
7325       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7326       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7327       PetscCall(MatProductSetFill(mp[cp], product->fill));
7328       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7329       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7330       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7331       mp[cp]->product->api_user = product->api_user;
7332       PetscCall(MatProductSetFromOptions(mp[cp]));
7333       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7334       rmapt[cp] = 2;
7335       rmapa[cp] = globidx;
7336       cmapt[cp] = 2;
7337       cmapa[cp] = P_oth_idx;
7338       mptmp[cp] = PETSC_FALSE;
7339       cp++;
7340     }
7341     break;
7342   default:
7343     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7344   }
7345   /* sanity check */
7346   if (size > 1)
7347     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7348 
7349   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7350   for (i = 0; i < cp; i++) {
7351     mmdata->mp[i]    = mp[i];
7352     mmdata->mptmp[i] = mptmp[i];
7353   }
7354   mmdata->cp             = cp;
7355   C->product->data       = mmdata;
7356   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7357   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7358 
7359   /* memory type */
7360   mmdata->mtype = PETSC_MEMTYPE_HOST;
7361   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7362   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7363   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7364   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7365   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7366   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7367 
7368   /* prepare coo coordinates for values insertion */
7369 
7370   /* count total nonzeros of those intermediate seqaij Mats
7371     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7372     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7373     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7374   */
7375   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7376     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7377     if (mptmp[cp]) continue;
7378     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7379       const PetscInt *rmap = rmapa[cp];
7380       const PetscInt  mr   = mp[cp]->rmap->n;
7381       const PetscInt  rs   = C->rmap->rstart;
7382       const PetscInt  re   = C->rmap->rend;
7383       const PetscInt *ii   = mm->i;
7384       for (i = 0; i < mr; i++) {
7385         const PetscInt gr = rmap[i];
7386         const PetscInt nz = ii[i + 1] - ii[i];
7387         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7388         else ncoo_oown += nz;                  /* this row is local */
7389       }
7390     } else ncoo_d += mm->nz;
7391   }
7392 
7393   /*
7394     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7395 
7396     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7397 
7398     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7399 
7400     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7401     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7402     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7403 
7404     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7405     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7406   */
7407   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7408   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7409 
7410   /* gather (i,j) of nonzeros inserted by remote procs */
7411   if (hasoffproc) {
7412     PetscSF  msf;
7413     PetscInt ncoo2, *coo_i2, *coo_j2;
7414 
7415     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7416     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7417     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7418 
7419     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7420       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7421       PetscInt   *idxoff = mmdata->off[cp];
7422       PetscInt   *idxown = mmdata->own[cp];
7423       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7424         const PetscInt *rmap = rmapa[cp];
7425         const PetscInt *cmap = cmapa[cp];
7426         const PetscInt *ii   = mm->i;
7427         PetscInt       *coi  = coo_i + ncoo_o;
7428         PetscInt       *coj  = coo_j + ncoo_o;
7429         const PetscInt  mr   = mp[cp]->rmap->n;
7430         const PetscInt  rs   = C->rmap->rstart;
7431         const PetscInt  re   = C->rmap->rend;
7432         const PetscInt  cs   = C->cmap->rstart;
7433         for (i = 0; i < mr; i++) {
7434           const PetscInt *jj = mm->j + ii[i];
7435           const PetscInt  gr = rmap[i];
7436           const PetscInt  nz = ii[i + 1] - ii[i];
7437           if (gr < rs || gr >= re) { /* this is an offproc row */
7438             for (j = ii[i]; j < ii[i + 1]; j++) {
7439               *coi++    = gr;
7440               *idxoff++ = j;
7441             }
7442             if (!cmapt[cp]) { /* already global */
7443               for (j = 0; j < nz; j++) *coj++ = jj[j];
7444             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7445               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7446             } else { /* offdiag */
7447               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7448             }
7449             ncoo_o += nz;
7450           } else { /* this is a local row */
7451             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7452           }
7453         }
7454       }
7455       mmdata->off[cp + 1] = idxoff;
7456       mmdata->own[cp + 1] = idxown;
7457     }
7458 
7459     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7460     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7461     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7462     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7463     ncoo = ncoo_d + ncoo_oown + ncoo2;
7464     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7465     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7466     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7467     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7468     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7469     PetscCall(PetscFree2(coo_i, coo_j));
7470     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7471     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7472     coo_i = coo_i2;
7473     coo_j = coo_j2;
7474   } else { /* no offproc values insertion */
7475     ncoo = ncoo_d;
7476     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7477 
7478     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7479     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7480     PetscCall(PetscSFSetUp(mmdata->sf));
7481   }
7482   mmdata->hasoffproc = hasoffproc;
7483 
7484   /* gather (i,j) of nonzeros inserted locally */
7485   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7486     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7487     PetscInt       *coi  = coo_i + ncoo_d;
7488     PetscInt       *coj  = coo_j + ncoo_d;
7489     const PetscInt *jj   = mm->j;
7490     const PetscInt *ii   = mm->i;
7491     const PetscInt *cmap = cmapa[cp];
7492     const PetscInt *rmap = rmapa[cp];
7493     const PetscInt  mr   = mp[cp]->rmap->n;
7494     const PetscInt  rs   = C->rmap->rstart;
7495     const PetscInt  re   = C->rmap->rend;
7496     const PetscInt  cs   = C->cmap->rstart;
7497 
7498     if (mptmp[cp]) continue;
7499     if (rmapt[cp] == 1) { /* consecutive rows */
7500       /* fill coo_i */
7501       for (i = 0; i < mr; i++) {
7502         const PetscInt gr = i + rs;
7503         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7504       }
7505       /* fill coo_j */
7506       if (!cmapt[cp]) { /* type-0, already global */
7507         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7508       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7509         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7510       } else {                                            /* type-2, local to global for sparse columns */
7511         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7512       }
7513       ncoo_d += mm->nz;
7514     } else if (rmapt[cp] == 2) { /* sparse rows */
7515       for (i = 0; i < mr; i++) {
7516         const PetscInt *jj = mm->j + ii[i];
7517         const PetscInt  gr = rmap[i];
7518         const PetscInt  nz = ii[i + 1] - ii[i];
7519         if (gr >= rs && gr < re) { /* local rows */
7520           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7521           if (!cmapt[cp]) { /* type-0, already global */
7522             for (j = 0; j < nz; j++) *coj++ = jj[j];
7523           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7524             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7525           } else { /* type-2, local to global for sparse columns */
7526             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7527           }
7528           ncoo_d += nz;
7529         }
7530       }
7531     }
7532   }
7533   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7534   PetscCall(ISDestroy(&glob));
7535   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7536   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7537   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7538   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7539 
7540   /* preallocate with COO data */
7541   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7542   PetscCall(PetscFree2(coo_i, coo_j));
7543   PetscFunctionReturn(PETSC_SUCCESS);
7544 }
7545 
7546 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7547 {
7548   Mat_Product *product = mat->product;
7549 #if defined(PETSC_HAVE_DEVICE)
7550   PetscBool match  = PETSC_FALSE;
7551   PetscBool usecpu = PETSC_FALSE;
7552 #else
7553   PetscBool match = PETSC_TRUE;
7554 #endif
7555 
7556   PetscFunctionBegin;
7557   MatCheckProduct(mat, 1);
7558 #if defined(PETSC_HAVE_DEVICE)
7559   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7560   if (match) { /* we can always fallback to the CPU if requested */
7561     switch (product->type) {
7562     case MATPRODUCT_AB:
7563       if (product->api_user) {
7564         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7565         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7566         PetscOptionsEnd();
7567       } else {
7568         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7569         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7570         PetscOptionsEnd();
7571       }
7572       break;
7573     case MATPRODUCT_AtB:
7574       if (product->api_user) {
7575         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7576         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7577         PetscOptionsEnd();
7578       } else {
7579         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7580         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7581         PetscOptionsEnd();
7582       }
7583       break;
7584     case MATPRODUCT_PtAP:
7585       if (product->api_user) {
7586         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7587         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7588         PetscOptionsEnd();
7589       } else {
7590         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7591         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7592         PetscOptionsEnd();
7593       }
7594       break;
7595     default:
7596       break;
7597     }
7598     match = (PetscBool)!usecpu;
7599   }
7600 #endif
7601   if (match) {
7602     switch (product->type) {
7603     case MATPRODUCT_AB:
7604     case MATPRODUCT_AtB:
7605     case MATPRODUCT_PtAP:
7606       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7607       break;
7608     default:
7609       break;
7610     }
7611   }
7612   /* fallback to MPIAIJ ops */
7613   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7614   PetscFunctionReturn(PETSC_SUCCESS);
7615 }
7616 
7617 /*
7618    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7619 
7620    n - the number of block indices in cc[]
7621    cc - the block indices (must be large enough to contain the indices)
7622 */
7623 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7624 {
7625   PetscInt        cnt = -1, nidx, j;
7626   const PetscInt *idx;
7627 
7628   PetscFunctionBegin;
7629   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7630   if (nidx) {
7631     cnt     = 0;
7632     cc[cnt] = idx[0] / bs;
7633     for (j = 1; j < nidx; j++) {
7634       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7635     }
7636   }
7637   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7638   *n = cnt + 1;
7639   PetscFunctionReturn(PETSC_SUCCESS);
7640 }
7641 
7642 /*
7643     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7644 
7645     ncollapsed - the number of block indices
7646     collapsed - the block indices (must be large enough to contain the indices)
7647 */
7648 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7649 {
7650   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7651 
7652   PetscFunctionBegin;
7653   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7654   for (i = start + 1; i < start + bs; i++) {
7655     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7656     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7657     cprevtmp = cprev;
7658     cprev    = merged;
7659     merged   = cprevtmp;
7660   }
7661   *ncollapsed = nprev;
7662   if (collapsed) *collapsed = cprev;
7663   PetscFunctionReturn(PETSC_SUCCESS);
7664 }
7665 
7666 /*
7667    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7668 */
7669 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7670 {
7671   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7672   Mat                tGmat;
7673   MPI_Comm           comm;
7674   const PetscScalar *vals;
7675   const PetscInt    *idx;
7676   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7677   MatScalar         *AA; // this is checked in graph
7678   PetscBool          isseqaij;
7679   Mat                a, b, c;
7680   MatType            jtype;
7681 
7682   PetscFunctionBegin;
7683   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7684   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7685   PetscCall(MatGetType(Gmat, &jtype));
7686   PetscCall(MatCreate(comm, &tGmat));
7687   PetscCall(MatSetType(tGmat, jtype));
7688 
7689   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7690                Also, if the matrix is symmetric, can we skip this
7691                operation? It can be very expensive on large matrices. */
7692 
7693   // global sizes
7694   PetscCall(MatGetSize(Gmat, &MM, &NN));
7695   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7696   nloc = Iend - Istart;
7697   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7698   if (isseqaij) {
7699     a = Gmat;
7700     b = NULL;
7701   } else {
7702     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7703     a             = d->A;
7704     b             = d->B;
7705     garray        = d->garray;
7706   }
7707   /* Determine upper bound on non-zeros needed in new filtered matrix */
7708   for (PetscInt row = 0; row < nloc; row++) {
7709     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7710     d_nnz[row] = ncols;
7711     if (ncols > maxcols) maxcols = ncols;
7712     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7713   }
7714   if (b) {
7715     for (PetscInt row = 0; row < nloc; row++) {
7716       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7717       o_nnz[row] = ncols;
7718       if (ncols > maxcols) maxcols = ncols;
7719       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7720     }
7721   }
7722   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7723   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7724   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7725   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7726   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7727   PetscCall(PetscFree2(d_nnz, o_nnz));
7728   //
7729   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7730   nnz0 = nnz1 = 0;
7731   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7732     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7733       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7734       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7735         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7736         if (PetscRealPart(sv) > vfilter) {
7737           nnz1++;
7738           PetscInt cid = idx[jj] + Istart; //diag
7739           if (c != a) cid = garray[idx[jj]];
7740           AA[ncol_row] = vals[jj];
7741           AJ[ncol_row] = cid;
7742           ncol_row++;
7743         }
7744       }
7745       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7746       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7747     }
7748   }
7749   PetscCall(PetscFree2(AA, AJ));
7750   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7751   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7752   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7753 
7754   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7755 
7756   *filteredG = tGmat;
7757   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7758   PetscFunctionReturn(PETSC_SUCCESS);
7759 }
7760 
7761 /*
7762  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7763 
7764  Input Parameter:
7765  . Amat - matrix
7766  - symmetrize - make the result symmetric
7767  + scale - scale with diagonal
7768 
7769  Output Parameter:
7770  . a_Gmat - output scalar graph >= 0
7771 
7772 */
7773 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7774 {
7775   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7776   MPI_Comm  comm;
7777   Mat       Gmat;
7778   PetscBool ismpiaij, isseqaij;
7779   Mat       a, b, c;
7780   MatType   jtype;
7781 
7782   PetscFunctionBegin;
7783   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7784   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7785   PetscCall(MatGetSize(Amat, &MM, &NN));
7786   PetscCall(MatGetBlockSize(Amat, &bs));
7787   nloc = (Iend - Istart) / bs;
7788 
7789   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7790   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7791   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7792 
7793   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7794   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7795      implementation */
7796   if (bs > 1) {
7797     PetscCall(MatGetType(Amat, &jtype));
7798     PetscCall(MatCreate(comm, &Gmat));
7799     PetscCall(MatSetType(Gmat, jtype));
7800     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7801     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7802     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7803       PetscInt  *d_nnz, *o_nnz;
7804       MatScalar *aa, val, *AA;
7805       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7806       if (isseqaij) {
7807         a = Amat;
7808         b = NULL;
7809       } else {
7810         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7811         a             = d->A;
7812         b             = d->B;
7813       }
7814       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7815       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7816       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7817         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7818         const PetscInt *cols1, *cols2;
7819         for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows
7820           PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL));
7821           nnz[brow / bs] = nc2 / bs;
7822           if (nc2 % bs) ok = 0;
7823           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7824           for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks
7825             PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL));
7826             if (nc1 != nc2) ok = 0;
7827             else {
7828               for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) {
7829                 if (cols1[jj] != cols2[jj]) ok = 0;
7830                 if (cols1[jj] % bs != jj % bs) ok = 0;
7831               }
7832             }
7833             PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL));
7834           }
7835           PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL));
7836           if (!ok) {
7837             PetscCall(PetscFree2(d_nnz, o_nnz));
7838             PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n"));
7839             goto old_bs;
7840           }
7841         }
7842       }
7843       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7844       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7845       PetscCall(PetscFree2(d_nnz, o_nnz));
7846       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7847       // diag
7848       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7849         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7850         ai               = aseq->i;
7851         n                = ai[brow + 1] - ai[brow];
7852         aj               = aseq->j + ai[brow];
7853         for (int k = 0; k < n; k += bs) {        // block columns
7854           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7855           val        = 0;
7856           for (int ii = 0; ii < bs; ii++) { // rows in block
7857             aa = aseq->a + ai[brow + ii] + k;
7858             for (int jj = 0; jj < bs; jj++) {         // columns in block
7859               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7860             }
7861           }
7862           PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7863           AA[k / bs] = val;
7864         }
7865         grow = Istart / bs + brow / bs;
7866         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7867       }
7868       // off-diag
7869       if (ismpiaij) {
7870         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7871         const PetscScalar *vals;
7872         const PetscInt    *cols, *garray = aij->garray;
7873         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7874         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7875           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7876           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7877             PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax");
7878             AA[k / bs] = 0;
7879             AJ[cidx]   = garray[cols[k]] / bs;
7880           }
7881           nc = ncols / bs;
7882           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7883           for (int ii = 0; ii < bs; ii++) { // rows in block
7884             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7885             for (int k = 0; k < ncols; k += bs) {
7886               for (int jj = 0; jj < bs; jj++) { // cols in block
7887                 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax);
7888                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7889               }
7890             }
7891             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7892           }
7893           grow = Istart / bs + brow / bs;
7894           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7895         }
7896       }
7897       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7898       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7899       PetscCall(PetscFree2(AA, AJ));
7900     } else {
7901       const PetscScalar *vals;
7902       const PetscInt    *idx;
7903       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7904     old_bs:
7905       /*
7906        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7907        */
7908       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7909       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7910       if (isseqaij) {
7911         PetscInt max_d_nnz;
7912         /*
7913          Determine exact preallocation count for (sequential) scalar matrix
7914          */
7915         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7916         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7917         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7918         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7919         PetscCall(PetscFree3(w0, w1, w2));
7920       } else if (ismpiaij) {
7921         Mat             Daij, Oaij;
7922         const PetscInt *garray;
7923         PetscInt        max_d_nnz;
7924         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7925         /*
7926          Determine exact preallocation count for diagonal block portion of scalar matrix
7927          */
7928         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7929         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7930         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7931         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7932         PetscCall(PetscFree3(w0, w1, w2));
7933         /*
7934          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7935          */
7936         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7937           o_nnz[jj] = 0;
7938           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7939             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7940             o_nnz[jj] += ncols;
7941             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7942           }
7943           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7944         }
7945       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7946       /* get scalar copy (norms) of matrix */
7947       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7948       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7949       PetscCall(PetscFree2(d_nnz, o_nnz));
7950       for (Ii = Istart; Ii < Iend; Ii++) {
7951         PetscInt dest_row = Ii / bs;
7952         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7953         for (jj = 0; jj < ncols; jj++) {
7954           PetscInt    dest_col = idx[jj] / bs;
7955           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7956           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7957         }
7958         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7959       }
7960       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7961       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7962     }
7963   } else {
7964     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7965     else {
7966       Gmat = Amat;
7967       PetscCall(PetscObjectReference((PetscObject)Gmat));
7968     }
7969     if (isseqaij) {
7970       a = Gmat;
7971       b = NULL;
7972     } else {
7973       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7974       a             = d->A;
7975       b             = d->B;
7976     }
7977     if (filter >= 0 || scale) {
7978       /* take absolute value of each entry */
7979       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7980         MatInfo      info;
7981         PetscScalar *avals;
7982         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
7983         PetscCall(MatSeqAIJGetArray(c, &avals));
7984         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
7985         PetscCall(MatSeqAIJRestoreArray(c, &avals));
7986       }
7987     }
7988   }
7989   if (symmetrize) {
7990     PetscBool isset, issym;
7991     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
7992     if (!isset || !issym) {
7993       Mat matTrans;
7994       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
7995       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
7996       PetscCall(MatDestroy(&matTrans));
7997     }
7998     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
7999   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8000   if (scale) {
8001     /* scale c for all diagonal values = 1 or -1 */
8002     Vec diag;
8003     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8004     PetscCall(MatGetDiagonal(Gmat, diag));
8005     PetscCall(VecReciprocal(diag));
8006     PetscCall(VecSqrtAbs(diag));
8007     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8008     PetscCall(VecDestroy(&diag));
8009   }
8010   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8011 
8012   if (filter >= 0) {
8013     Mat Fmat = NULL; /* some silly compiler needs this */
8014 
8015     PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat));
8016     PetscCall(MatDestroy(&Gmat));
8017     Gmat = Fmat;
8018   }
8019   *a_Gmat = Gmat;
8020   PetscFunctionReturn(PETSC_SUCCESS);
8021 }
8022 
8023 /*
8024     Special version for direct calls from Fortran
8025 */
8026 #include <petsc/private/fortranimpl.h>
8027 
8028 /* Change these macros so can be used in void function */
8029 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8030 #undef PetscCall
8031 #define PetscCall(...) \
8032   do { \
8033     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8034     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8035       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8036       return; \
8037     } \
8038   } while (0)
8039 
8040 #undef SETERRQ
8041 #define SETERRQ(comm, ierr, ...) \
8042   do { \
8043     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8044     return; \
8045   } while (0)
8046 
8047 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8048   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8049 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8050   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8051 #else
8052 #endif
8053 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8054 {
8055   Mat         mat = *mmat;
8056   PetscInt    m = *mm, n = *mn;
8057   InsertMode  addv = *maddv;
8058   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8059   PetscScalar value;
8060 
8061   MatCheckPreallocated(mat, 1);
8062   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8063   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8064   {
8065     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8066     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8067     PetscBool roworiented = aij->roworiented;
8068 
8069     /* Some Variables required in the macro */
8070     Mat         A     = aij->A;
8071     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8072     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8073     MatScalar  *aa;
8074     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8075     Mat         B                 = aij->B;
8076     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8077     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8078     MatScalar  *ba;
8079     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8080      * cannot use "#if defined" inside a macro. */
8081     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8082 
8083     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8084     PetscInt   nonew = a->nonew;
8085     MatScalar *ap1, *ap2;
8086 
8087     PetscFunctionBegin;
8088     PetscCall(MatSeqAIJGetArray(A, &aa));
8089     PetscCall(MatSeqAIJGetArray(B, &ba));
8090     for (i = 0; i < m; i++) {
8091       if (im[i] < 0) continue;
8092       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8093       if (im[i] >= rstart && im[i] < rend) {
8094         row      = im[i] - rstart;
8095         lastcol1 = -1;
8096         rp1      = aj + ai[row];
8097         ap1      = aa + ai[row];
8098         rmax1    = aimax[row];
8099         nrow1    = ailen[row];
8100         low1     = 0;
8101         high1    = nrow1;
8102         lastcol2 = -1;
8103         rp2      = bj + bi[row];
8104         ap2      = ba + bi[row];
8105         rmax2    = bimax[row];
8106         nrow2    = bilen[row];
8107         low2     = 0;
8108         high2    = nrow2;
8109 
8110         for (j = 0; j < n; j++) {
8111           if (roworiented) value = v[i * n + j];
8112           else value = v[i + j * m];
8113           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8114           if (in[j] >= cstart && in[j] < cend) {
8115             col = in[j] - cstart;
8116             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8117           } else if (in[j] < 0) continue;
8118           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8119             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8120           } else {
8121             if (mat->was_assembled) {
8122               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8123 #if defined(PETSC_USE_CTABLE)
8124               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8125               col--;
8126 #else
8127               col = aij->colmap[in[j]] - 1;
8128 #endif
8129               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8130                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8131                 col = in[j];
8132                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8133                 B        = aij->B;
8134                 b        = (Mat_SeqAIJ *)B->data;
8135                 bimax    = b->imax;
8136                 bi       = b->i;
8137                 bilen    = b->ilen;
8138                 bj       = b->j;
8139                 rp2      = bj + bi[row];
8140                 ap2      = ba + bi[row];
8141                 rmax2    = bimax[row];
8142                 nrow2    = bilen[row];
8143                 low2     = 0;
8144                 high2    = nrow2;
8145                 bm       = aij->B->rmap->n;
8146                 ba       = b->a;
8147                 inserted = PETSC_FALSE;
8148               }
8149             } else col = in[j];
8150             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8151           }
8152         }
8153       } else if (!aij->donotstash) {
8154         if (roworiented) {
8155           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8156         } else {
8157           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8158         }
8159       }
8160     }
8161     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8162     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8163   }
8164   PetscFunctionReturnVoid();
8165 }
8166 
8167 /* Undefining these here since they were redefined from their original definition above! No
8168  * other PETSc functions should be defined past this point, as it is impossible to recover the
8169  * original definitions */
8170 #undef PetscCall
8171 #undef SETERRQ
8172