xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 65179781167c53baf4bbcd2ba89ddd86e7bb5b3d)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
10 {
11   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
12 
13   PetscFunctionBegin;
14 #if defined(PETSC_USE_LOG)
15   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
16 #endif
17   PetscCall(MatStashDestroy_Private(&mat->stash));
18   PetscCall(VecDestroy(&aij->diag));
19   PetscCall(MatDestroy(&aij->A));
20   PetscCall(MatDestroy(&aij->B));
21 #if defined(PETSC_USE_CTABLE)
22   PetscCall(PetscHMapIDestroy(&aij->colmap));
23 #else
24   PetscCall(PetscFree(aij->colmap));
25 #endif
26   PetscCall(PetscFree(aij->garray));
27   PetscCall(VecDestroy(&aij->lvec));
28   PetscCall(VecScatterDestroy(&aij->Mvctx));
29   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
30   PetscCall(PetscFree(aij->ld));
31 
32   /* Free COO */
33   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
34 
35   PetscCall(PetscFree(mat->data));
36 
37   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
38   PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL));
39 
40   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
41   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
42   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
43   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL));
44   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL));
45   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL));
46   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL));
47   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
48   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL));
49   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL));
50 #if defined(PETSC_HAVE_CUDA)
51   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL));
52 #endif
53 #if defined(PETSC_HAVE_HIP)
54   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL));
55 #endif
56 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
57   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL));
58 #endif
59   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL));
60 #if defined(PETSC_HAVE_ELEMENTAL)
61   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL));
62 #endif
63 #if defined(PETSC_HAVE_SCALAPACK)
64   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL));
65 #endif
66 #if defined(PETSC_HAVE_HYPRE)
67   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL));
68   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL));
69 #endif
70   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
71   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL));
72   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL));
73   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL));
74   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL));
75   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL));
76 #if defined(PETSC_HAVE_MKL_SPARSE)
77   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL));
78 #endif
79   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL));
80   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL));
81   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL));
82   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL));
83   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL));
84   PetscFunctionReturn(PETSC_SUCCESS);
85 }
86 
87 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and  MatAssemblyEnd_MPI_Hash() */
88 #define TYPE AIJ
89 #define TYPE_AIJ
90 #include "../src/mat/impls/aij/mpi/mpihashmat.h"
91 #undef TYPE
92 #undef TYPE_AIJ
93 
94 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
95 {
96   Mat B;
97 
98   PetscFunctionBegin;
99   PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B));
100   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B));
101   PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
102   PetscCall(MatDestroy(&B));
103   PetscFunctionReturn(PETSC_SUCCESS);
104 }
105 
106 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
107 {
108   Mat B;
109 
110   PetscFunctionBegin;
111   PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B));
112   PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done));
113   PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL));
114   PetscFunctionReturn(PETSC_SUCCESS);
115 }
116 
117 /*MC
118    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
119 
120    This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator,
121    and `MATMPIAIJ` otherwise.  As a result, for single process communicators,
122   `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
123   for communicators controlling multiple processes.  It is recommended that you call both of
124   the above preallocation routines for simplicity.
125 
126    Options Database Keys:
127 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()`
128 
129   Developer Note:
130     Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when
131    enough exist.
132 
133   Level: beginner
134 
135 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ`
136 M*/
137 
138 /*MC
139    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
140 
141    This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator,
142    and `MATMPIAIJCRL` otherwise.  As a result, for single process communicators,
143    `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported
144   for communicators controlling multiple processes.  It is recommended that you call both of
145   the above preallocation routines for simplicity.
146 
147    Options Database Keys:
148 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()`
149 
150   Level: beginner
151 
152 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`
153 M*/
154 
155 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg)
156 {
157   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
158 
159   PetscFunctionBegin;
160 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL)
161   A->boundtocpu = flg;
162 #endif
163   if (a->A) PetscCall(MatBindToCPU(a->A, flg));
164   if (a->B) PetscCall(MatBindToCPU(a->B, flg));
165 
166   /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products.
167    * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors
168    * to differ from the parent matrix. */
169   if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg));
170   if (a->diag) PetscCall(VecBindToCPU(a->diag, flg));
171 
172   PetscFunctionReturn(PETSC_SUCCESS);
173 }
174 
175 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
176 {
177   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data;
178 
179   PetscFunctionBegin;
180   if (mat->A) {
181     PetscCall(MatSetBlockSizes(mat->A, rbs, cbs));
182     PetscCall(MatSetBlockSizes(mat->B, rbs, 1));
183   }
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows)
188 {
189   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)M->data;
190   Mat_SeqAIJ      *a   = (Mat_SeqAIJ *)mat->A->data;
191   Mat_SeqAIJ      *b   = (Mat_SeqAIJ *)mat->B->data;
192   const PetscInt  *ia, *ib;
193   const MatScalar *aa, *bb, *aav, *bav;
194   PetscInt         na, nb, i, j, *rows, cnt = 0, n0rows;
195   PetscInt         m = M->rmap->n, rstart = M->rmap->rstart;
196 
197   PetscFunctionBegin;
198   *keptrows = NULL;
199 
200   ia = a->i;
201   ib = b->i;
202   PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav));
203   PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav));
204   for (i = 0; i < m; i++) {
205     na = ia[i + 1] - ia[i];
206     nb = ib[i + 1] - ib[i];
207     if (!na && !nb) {
208       cnt++;
209       goto ok1;
210     }
211     aa = aav + ia[i];
212     for (j = 0; j < na; j++) {
213       if (aa[j] != 0.0) goto ok1;
214     }
215     bb = bav + ib[i];
216     for (j = 0; j < nb; j++) {
217       if (bb[j] != 0.0) goto ok1;
218     }
219     cnt++;
220   ok1:;
221   }
222   PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M)));
223   if (!n0rows) {
224     PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
225     PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
226     PetscFunctionReturn(PETSC_SUCCESS);
227   }
228   PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows));
229   cnt = 0;
230   for (i = 0; i < m; i++) {
231     na = ia[i + 1] - ia[i];
232     nb = ib[i + 1] - ib[i];
233     if (!na && !nb) continue;
234     aa = aav + ia[i];
235     for (j = 0; j < na; j++) {
236       if (aa[j] != 0.0) {
237         rows[cnt++] = rstart + i;
238         goto ok2;
239       }
240     }
241     bb = bav + ib[i];
242     for (j = 0; j < nb; j++) {
243       if (bb[j] != 0.0) {
244         rows[cnt++] = rstart + i;
245         goto ok2;
246       }
247     }
248   ok2:;
249   }
250   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows));
251   PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav));
252   PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav));
253   PetscFunctionReturn(PETSC_SUCCESS);
254 }
255 
256 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is)
257 {
258   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data;
259   PetscBool   cong;
260 
261   PetscFunctionBegin;
262   PetscCall(MatHasCongruentLayouts(Y, &cong));
263   if (Y->assembled && cong) {
264     PetscCall(MatDiagonalSet(aij->A, D, is));
265   } else {
266     PetscCall(MatDiagonalSet_Default(Y, D, is));
267   }
268   PetscFunctionReturn(PETSC_SUCCESS);
269 }
270 
271 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows)
272 {
273   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data;
274   PetscInt    i, rstart, nrows, *rows;
275 
276   PetscFunctionBegin;
277   *zrows = NULL;
278   PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows));
279   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
280   for (i = 0; i < nrows; i++) rows[i] += rstart;
281   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows));
282   PetscFunctionReturn(PETSC_SUCCESS);
283 }
284 
285 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions)
286 {
287   Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)A->data;
288   PetscInt           i, m, n, *garray = aij->garray;
289   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ *)aij->A->data;
290   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ *)aij->B->data;
291   PetscReal         *work;
292   const PetscScalar *dummy;
293 
294   PetscFunctionBegin;
295   PetscCall(MatGetSize(A, &m, &n));
296   PetscCall(PetscCalloc1(n, &work));
297   PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy));
298   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy));
299   PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy));
300   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy));
301   if (type == NORM_2) {
302     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]);
303     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]);
304   } else if (type == NORM_1) {
305     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
306     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
307   } else if (type == NORM_INFINITY) {
308     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
309     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]);
310   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
311     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]);
312     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]);
313   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
314     for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]);
315     for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]);
316   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
317   if (type == NORM_INFINITY) {
318     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
319   } else {
320     PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
321   }
322   PetscCall(PetscFree(work));
323   if (type == NORM_2) {
324     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
325   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
326     for (i = 0; i < n; i++) reductions[i] /= m;
327   }
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
331 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is)
332 {
333   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
334   IS              sis, gis;
335   const PetscInt *isis, *igis;
336   PetscInt        n, *iis, nsis, ngis, rstart, i;
337 
338   PetscFunctionBegin;
339   PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis));
340   PetscCall(MatFindNonzeroRows(a->B, &gis));
341   PetscCall(ISGetSize(gis, &ngis));
342   PetscCall(ISGetSize(sis, &nsis));
343   PetscCall(ISGetIndices(sis, &isis));
344   PetscCall(ISGetIndices(gis, &igis));
345 
346   PetscCall(PetscMalloc1(ngis + nsis, &iis));
347   PetscCall(PetscArraycpy(iis, igis, ngis));
348   PetscCall(PetscArraycpy(iis + ngis, isis, nsis));
349   n = ngis + nsis;
350   PetscCall(PetscSortRemoveDupsInt(&n, iis));
351   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
352   for (i = 0; i < n; i++) iis[i] += rstart;
353   PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is));
354 
355   PetscCall(ISRestoreIndices(sis, &isis));
356   PetscCall(ISRestoreIndices(gis, &igis));
357   PetscCall(ISDestroy(&sis));
358   PetscCall(ISDestroy(&gis));
359   PetscFunctionReturn(PETSC_SUCCESS);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to access.
368 */
369 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
370 {
371   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
372   PetscInt    n   = aij->B->cmap->n, i;
373 
374   PetscFunctionBegin;
375   PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray");
376 #if defined(PETSC_USE_CTABLE)
377   PetscCall(PetscHMapICreateWithSize(n, &aij->colmap));
378   for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1));
379 #else
380   PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap));
381   for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1;
382 #endif
383   PetscFunctionReturn(PETSC_SUCCESS);
384 }
385 
386 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \
387   { \
388     if (col <= lastcol1) low1 = 0; \
389     else high1 = nrow1; \
390     lastcol1 = col; \
391     while (high1 - low1 > 5) { \
392       t = (low1 + high1) / 2; \
393       if (rp1[t] > col) high1 = t; \
394       else low1 = t; \
395     } \
396     for (_i = low1; _i < high1; _i++) { \
397       if (rp1[_i] > col) break; \
398       if (rp1[_i] == col) { \
399         if (addv == ADD_VALUES) { \
400           ap1[_i] += value; \
401           /* Not sure LogFlops will slow dow the code or not */ \
402           (void)PetscLogFlops(1.0); \
403         } else ap1[_i] = value; \
404         goto a_noinsert; \
405       } \
406     } \
407     if (value == 0.0 && ignorezeroentries && row != col) { \
408       low1  = 0; \
409       high1 = nrow1; \
410       goto a_noinsert; \
411     } \
412     if (nonew == 1) { \
413       low1  = 0; \
414       high1 = nrow1; \
415       goto a_noinsert; \
416     } \
417     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
418     MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \
419     N = nrow1++ - 1; \
420     a->nz++; \
421     high1++; \
422     /* shift up all the later entries in this row */ \
423     PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \
424     PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \
425     rp1[_i] = col; \
426     ap1[_i] = value; \
427     A->nonzerostate++; \
428   a_noinsert:; \
429     ailen[row] = nrow1; \
430   }
431 
432 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \
433   { \
434     if (col <= lastcol2) low2 = 0; \
435     else high2 = nrow2; \
436     lastcol2 = col; \
437     while (high2 - low2 > 5) { \
438       t = (low2 + high2) / 2; \
439       if (rp2[t] > col) high2 = t; \
440       else low2 = t; \
441     } \
442     for (_i = low2; _i < high2; _i++) { \
443       if (rp2[_i] > col) break; \
444       if (rp2[_i] == col) { \
445         if (addv == ADD_VALUES) { \
446           ap2[_i] += value; \
447           (void)PetscLogFlops(1.0); \
448         } else ap2[_i] = value; \
449         goto b_noinsert; \
450       } \
451     } \
452     if (value == 0.0 && ignorezeroentries) { \
453       low2  = 0; \
454       high2 = nrow2; \
455       goto b_noinsert; \
456     } \
457     if (nonew == 1) { \
458       low2  = 0; \
459       high2 = nrow2; \
460       goto b_noinsert; \
461     } \
462     PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
463     MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \
464     N = nrow2++ - 1; \
465     b->nz++; \
466     high2++; \
467     /* shift up all the later entries in this row */ \
468     PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \
469     PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \
470     rp2[_i] = col; \
471     ap2[_i] = value; \
472     B->nonzerostate++; \
473   b_noinsert:; \
474     bilen[row] = nrow2; \
475   }
476 
477 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[])
478 {
479   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)A->data;
480   Mat_SeqAIJ  *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data;
481   PetscInt     l, *garray                         = mat->garray, diag;
482   PetscScalar *aa, *ba;
483 
484   PetscFunctionBegin;
485   /* code only works for square matrices A */
486 
487   /* find size of row to the left of the diagonal part */
488   PetscCall(MatGetOwnershipRange(A, &diag, NULL));
489   row = row - diag;
490   for (l = 0; l < b->i[row + 1] - b->i[row]; l++) {
491     if (garray[b->j[b->i[row] + l]] > diag) break;
492   }
493   if (l) {
494     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
495     PetscCall(PetscArraycpy(ba + b->i[row], v, l));
496     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
497   }
498 
499   /* diagonal part */
500   if (a->i[row + 1] - a->i[row]) {
501     PetscCall(MatSeqAIJGetArray(mat->A, &aa));
502     PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row])));
503     PetscCall(MatSeqAIJRestoreArray(mat->A, &aa));
504   }
505 
506   /* right of diagonal part */
507   if (b->i[row + 1] - b->i[row] - l) {
508     PetscCall(MatSeqAIJGetArray(mat->B, &ba));
509     PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l));
510     PetscCall(MatSeqAIJRestoreArray(mat->B, &ba));
511   }
512   PetscFunctionReturn(PETSC_SUCCESS);
513 }
514 
515 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
516 {
517   Mat_MPIAIJ *aij   = (Mat_MPIAIJ *)mat->data;
518   PetscScalar value = 0.0;
519   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
520   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
521   PetscBool   roworiented = aij->roworiented;
522 
523   /* Some Variables required in the macro */
524   Mat         A     = aij->A;
525   Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
526   PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
527   PetscBool   ignorezeroentries = a->ignorezeroentries;
528   Mat         B                 = aij->B;
529   Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
530   PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
531   MatScalar  *aa, *ba;
532   PetscInt   *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
533   PetscInt    nonew;
534   MatScalar  *ap1, *ap2;
535 
536   PetscFunctionBegin;
537   PetscCall(MatSeqAIJGetArray(A, &aa));
538   PetscCall(MatSeqAIJGetArray(B, &ba));
539   for (i = 0; i < m; i++) {
540     if (im[i] < 0) continue;
541     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
542     if (im[i] >= rstart && im[i] < rend) {
543       row      = im[i] - rstart;
544       lastcol1 = -1;
545       rp1      = aj + ai[row];
546       ap1      = aa + ai[row];
547       rmax1    = aimax[row];
548       nrow1    = ailen[row];
549       low1     = 0;
550       high1    = nrow1;
551       lastcol2 = -1;
552       rp2      = bj + bi[row];
553       ap2      = ba + bi[row];
554       rmax2    = bimax[row];
555       nrow2    = bilen[row];
556       low2     = 0;
557       high2    = nrow2;
558 
559       for (j = 0; j < n; j++) {
560         if (v) value = roworiented ? v[i * n + j] : v[i + j * m];
561         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
562         if (in[j] >= cstart && in[j] < cend) {
563           col   = in[j] - cstart;
564           nonew = a->nonew;
565           MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
566         } else if (in[j] < 0) {
567           continue;
568         } else {
569           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
570           if (mat->was_assembled) {
571             if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
572 #if defined(PETSC_USE_CTABLE)
573             PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */
574             col--;
575 #else
576             col = aij->colmap[in[j]] - 1;
577 #endif
578             if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */
579               PetscCall(MatDisAssemble_MPIAIJ(mat));                 /* Change aij->B from reduced/local format to expanded/global format */
580               col = in[j];
581               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
582               B     = aij->B;
583               b     = (Mat_SeqAIJ *)B->data;
584               bimax = b->imax;
585               bi    = b->i;
586               bilen = b->ilen;
587               bj    = b->j;
588               ba    = b->a;
589               rp2   = bj + bi[row];
590               ap2   = ba + bi[row];
591               rmax2 = bimax[row];
592               nrow2 = bilen[row];
593               low2  = 0;
594               high2 = nrow2;
595               bm    = aij->B->rmap->n;
596               ba    = b->a;
597             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
598               if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) {
599                 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j]));
600               } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
601             }
602           } else col = in[j];
603           nonew = b->nonew;
604           MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
605         }
606       }
607     } else {
608       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
609       if (!aij->donotstash) {
610         mat->assembled = PETSC_FALSE;
611         if (roworiented) {
612           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
613         } else {
614           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
615         }
616       }
617     }
618   }
619   PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */
620   PetscCall(MatSeqAIJRestoreArray(B, &ba));
621   PetscFunctionReturn(PETSC_SUCCESS);
622 }
623 
624 /*
625     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
626     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
627     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
628 */
629 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[])
630 {
631   Mat_MPIAIJ *aij    = (Mat_MPIAIJ *)mat->data;
632   Mat         A      = aij->A; /* diagonal part of the matrix */
633   Mat         B      = aij->B; /* offdiagonal part of the matrix */
634   Mat_SeqAIJ *a      = (Mat_SeqAIJ *)A->data;
635   Mat_SeqAIJ *b      = (Mat_SeqAIJ *)B->data;
636   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, col;
637   PetscInt   *ailen = a->ilen, *aj = a->j;
638   PetscInt   *bilen = b->ilen, *bj = b->j;
639   PetscInt    am          = aij->A->rmap->n, j;
640   PetscInt    diag_so_far = 0, dnz;
641   PetscInt    offd_so_far = 0, onz;
642 
643   PetscFunctionBegin;
644   /* Iterate over all rows of the matrix */
645   for (j = 0; j < am; j++) {
646     dnz = onz = 0;
647     /*  Iterate over all non-zero columns of the current row */
648     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
649       /* If column is in the diagonal */
650       if (mat_j[col] >= cstart && mat_j[col] < cend) {
651         aj[diag_so_far++] = mat_j[col] - cstart;
652         dnz++;
653       } else { /* off-diagonal entries */
654         bj[offd_so_far++] = mat_j[col];
655         onz++;
656       }
657     }
658     ailen[j] = dnz;
659     bilen[j] = onz;
660   }
661   PetscFunctionReturn(PETSC_SUCCESS);
662 }
663 
664 /*
665     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
666     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
667     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
668     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
669     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
670 */
671 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[])
672 {
673   Mat_MPIAIJ  *aij  = (Mat_MPIAIJ *)mat->data;
674   Mat          A    = aij->A; /* diagonal part of the matrix */
675   Mat          B    = aij->B; /* offdiagonal part of the matrix */
676   Mat_SeqAIJ  *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data;
677   Mat_SeqAIJ  *a      = (Mat_SeqAIJ *)A->data;
678   Mat_SeqAIJ  *b      = (Mat_SeqAIJ *)B->data;
679   PetscInt     cstart = mat->cmap->rstart, cend = mat->cmap->rend;
680   PetscInt    *ailen = a->ilen, *aj = a->j;
681   PetscInt    *bilen = b->ilen, *bj = b->j;
682   PetscInt     am          = aij->A->rmap->n, j;
683   PetscInt    *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
684   PetscInt     col, dnz_row, onz_row, rowstart_diag, rowstart_offd;
685   PetscScalar *aa = a->a, *ba = b->a;
686 
687   PetscFunctionBegin;
688   /* Iterate over all rows of the matrix */
689   for (j = 0; j < am; j++) {
690     dnz_row = onz_row = 0;
691     rowstart_offd     = full_offd_i[j];
692     rowstart_diag     = full_diag_i[j];
693     /*  Iterate over all non-zero columns of the current row */
694     for (col = mat_i[j]; col < mat_i[j + 1]; col++) {
695       /* If column is in the diagonal */
696       if (mat_j[col] >= cstart && mat_j[col] < cend) {
697         aj[rowstart_diag + dnz_row] = mat_j[col] - cstart;
698         aa[rowstart_diag + dnz_row] = mat_a[col];
699         dnz_row++;
700       } else { /* off-diagonal entries */
701         bj[rowstart_offd + onz_row] = mat_j[col];
702         ba[rowstart_offd + onz_row] = mat_a[col];
703         onz_row++;
704       }
705     }
706     ailen[j] = dnz_row;
707     bilen[j] = onz_row;
708   }
709   PetscFunctionReturn(PETSC_SUCCESS);
710 }
711 
712 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
713 {
714   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
715   PetscInt    i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
716   PetscInt    cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
717 
718   PetscFunctionBegin;
719   for (i = 0; i < m; i++) {
720     if (idxm[i] < 0) continue; /* negative row */
721     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
722     if (idxm[i] >= rstart && idxm[i] < rend) {
723       row = idxm[i] - rstart;
724       for (j = 0; j < n; j++) {
725         if (idxn[j] < 0) continue; /* negative column */
726         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
727         if (idxn[j] >= cstart && idxn[j] < cend) {
728           col = idxn[j] - cstart;
729           PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j));
730         } else {
731           if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
732 #if defined(PETSC_USE_CTABLE)
733           PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col));
734           col--;
735 #else
736           col = aij->colmap[idxn[j]] - 1;
737 #endif
738           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0;
739           else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j));
740         }
741       }
742     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
743   }
744   PetscFunctionReturn(PETSC_SUCCESS);
745 }
746 
747 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode)
748 {
749   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
750   PetscInt    nstash, reallocs;
751 
752   PetscFunctionBegin;
753   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
754 
755   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
756   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
757   PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
758   PetscFunctionReturn(PETSC_SUCCESS);
759 }
760 
761 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode)
762 {
763   Mat_MPIAIJ  *aij = (Mat_MPIAIJ *)mat->data;
764   PetscMPIInt  n;
765   PetscInt     i, j, rstart, ncols, flg;
766   PetscInt    *row, *col;
767   PetscBool    other_disassembled;
768   PetscScalar *val;
769 
770   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
771 
772   PetscFunctionBegin;
773   if (!aij->donotstash && !mat->nooffprocentries) {
774     while (1) {
775       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
776       if (!flg) break;
777 
778       for (i = 0; i < n;) {
779         /* Now identify the consecutive vals belonging to the same row */
780         for (j = i, rstart = row[j]; j < n; j++) {
781           if (row[j] != rstart) break;
782         }
783         if (j < n) ncols = j - i;
784         else ncols = n - i;
785         /* Now assemble all these values with a single function call */
786         PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
787         i = j;
788       }
789     }
790     PetscCall(MatStashScatterEnd_Private(&mat->stash));
791   }
792 #if defined(PETSC_HAVE_DEVICE)
793   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
794   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
795   if (mat->boundtocpu) {
796     PetscCall(MatBindToCPU(aij->A, PETSC_TRUE));
797     PetscCall(MatBindToCPU(aij->B, PETSC_TRUE));
798   }
799 #endif
800   PetscCall(MatAssemblyBegin(aij->A, mode));
801   PetscCall(MatAssemblyEnd(aij->A, mode));
802 
803   /* determine if any processor has disassembled, if so we must
804      also disassemble ourself, in order that we may reassemble. */
805   /*
806      if nonzero structure of submatrix B cannot change then we know that
807      no processor disassembled thus we can skip this stuff
808   */
809   if (!((Mat_SeqAIJ *)aij->B->data)->nonew) {
810     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
811     if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */
812       PetscCall(MatDisAssemble_MPIAIJ(mat));
813     }
814   }
815   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat));
816   PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE));
817 #if defined(PETSC_HAVE_DEVICE)
818   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
819 #endif
820   PetscCall(MatAssemblyBegin(aij->B, mode));
821   PetscCall(MatAssemblyEnd(aij->B, mode));
822 
823   PetscCall(PetscFree2(aij->rowvalues, aij->rowindices));
824 
825   aij->rowvalues = NULL;
826 
827   PetscCall(VecDestroy(&aij->diag));
828 
829   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
830   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
831     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
832     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
833   }
834 #if defined(PETSC_HAVE_DEVICE)
835   mat->offloadmask = PETSC_OFFLOAD_BOTH;
836 #endif
837   PetscFunctionReturn(PETSC_SUCCESS);
838 }
839 
840 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
841 {
842   Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data;
843 
844   PetscFunctionBegin;
845   PetscCall(MatZeroEntries(l->A));
846   PetscCall(MatZeroEntries(l->B));
847   PetscFunctionReturn(PETSC_SUCCESS);
848 }
849 
850 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
851 {
852   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *)A->data;
853   PetscObjectState sA, sB;
854   PetscInt        *lrows;
855   PetscInt         r, len;
856   PetscBool        cong, lch, gch;
857 
858   PetscFunctionBegin;
859   /* get locally owned rows */
860   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
861   PetscCall(MatHasCongruentLayouts(A, &cong));
862   /* fix right hand side if needed */
863   if (x && b) {
864     const PetscScalar *xx;
865     PetscScalar       *bb;
866 
867     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
868     PetscCall(VecGetArrayRead(x, &xx));
869     PetscCall(VecGetArray(b, &bb));
870     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
871     PetscCall(VecRestoreArrayRead(x, &xx));
872     PetscCall(VecRestoreArray(b, &bb));
873   }
874 
875   sA = mat->A->nonzerostate;
876   sB = mat->B->nonzerostate;
877 
878   if (diag != 0.0 && cong) {
879     PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL));
880     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
881   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
882     Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data;
883     Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data;
884     PetscInt    nnwA, nnwB;
885     PetscBool   nnzA, nnzB;
886 
887     nnwA = aijA->nonew;
888     nnwB = aijB->nonew;
889     nnzA = aijA->keepnonzeropattern;
890     nnzB = aijB->keepnonzeropattern;
891     if (!nnzA) {
892       PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n"));
893       aijA->nonew = 0;
894     }
895     if (!nnzB) {
896       PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n"));
897       aijB->nonew = 0;
898     }
899     /* Must zero here before the next loop */
900     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
901     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
902     for (r = 0; r < len; ++r) {
903       const PetscInt row = lrows[r] + A->rmap->rstart;
904       if (row >= A->cmap->N) continue;
905       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
906     }
907     aijA->nonew = nnwA;
908     aijB->nonew = nnwB;
909   } else {
910     PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL));
911     PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL));
912   }
913   PetscCall(PetscFree(lrows));
914   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
915   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
916 
917   /* reduce nonzerostate */
918   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
919   PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A)));
920   if (gch) A->nonzerostate++;
921   PetscFunctionReturn(PETSC_SUCCESS);
922 }
923 
924 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b)
925 {
926   Mat_MPIAIJ        *l = (Mat_MPIAIJ *)A->data;
927   PetscMPIInt        n = A->rmap->n;
928   PetscInt           i, j, r, m, len = 0;
929   PetscInt          *lrows, *owners = A->rmap->range;
930   PetscMPIInt        p = 0;
931   PetscSFNode       *rrows;
932   PetscSF            sf;
933   const PetscScalar *xx;
934   PetscScalar       *bb, *mask, *aij_a;
935   Vec                xmask, lmask;
936   Mat_SeqAIJ        *aij = (Mat_SeqAIJ *)l->B->data;
937   const PetscInt    *aj, *ii, *ridx;
938   PetscScalar       *aa;
939 
940   PetscFunctionBegin;
941   /* Create SF where leaves are input rows and roots are owned rows */
942   PetscCall(PetscMalloc1(n, &lrows));
943   for (r = 0; r < n; ++r) lrows[r] = -1;
944   PetscCall(PetscMalloc1(N, &rrows));
945   for (r = 0; r < N; ++r) {
946     const PetscInt idx = rows[r];
947     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
948     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
949       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
950     }
951     rrows[r].rank  = p;
952     rrows[r].index = rows[r] - owners[p];
953   }
954   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
955   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
956   /* Collect flags for rows to be zeroed */
957   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
958   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
959   PetscCall(PetscSFDestroy(&sf));
960   /* Compress and put in row numbers */
961   for (r = 0; r < n; ++r)
962     if (lrows[r] >= 0) lrows[len++] = r;
963   /* zero diagonal part of matrix */
964   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
965   /* handle off diagonal part of matrix */
966   PetscCall(MatCreateVecs(A, &xmask, NULL));
967   PetscCall(VecDuplicate(l->lvec, &lmask));
968   PetscCall(VecGetArray(xmask, &bb));
969   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
970   PetscCall(VecRestoreArray(xmask, &bb));
971   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
972   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
973   PetscCall(VecDestroy(&xmask));
974   if (x && b) { /* this code is buggy when the row and column layout don't match */
975     PetscBool cong;
976 
977     PetscCall(MatHasCongruentLayouts(A, &cong));
978     PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout");
979     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
980     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
981     PetscCall(VecGetArrayRead(l->lvec, &xx));
982     PetscCall(VecGetArray(b, &bb));
983   }
984   PetscCall(VecGetArray(lmask, &mask));
985   /* remove zeroed rows of off diagonal matrix */
986   PetscCall(MatSeqAIJGetArray(l->B, &aij_a));
987   ii = aij->i;
988   for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]]));
989   /* loop over all elements of off process part of matrix zeroing removed columns*/
990   if (aij->compressedrow.use) {
991     m    = aij->compressedrow.nrows;
992     ii   = aij->compressedrow.i;
993     ridx = aij->compressedrow.rindex;
994     for (i = 0; i < m; i++) {
995       n  = ii[i + 1] - ii[i];
996       aj = aij->j + ii[i];
997       aa = aij_a + ii[i];
998 
999       for (j = 0; j < n; j++) {
1000         if (PetscAbsScalar(mask[*aj])) {
1001           if (b) bb[*ridx] -= *aa * xx[*aj];
1002           *aa = 0.0;
1003         }
1004         aa++;
1005         aj++;
1006       }
1007       ridx++;
1008     }
1009   } else { /* do not use compressed row format */
1010     m = l->B->rmap->n;
1011     for (i = 0; i < m; i++) {
1012       n  = ii[i + 1] - ii[i];
1013       aj = aij->j + ii[i];
1014       aa = aij_a + ii[i];
1015       for (j = 0; j < n; j++) {
1016         if (PetscAbsScalar(mask[*aj])) {
1017           if (b) bb[i] -= *aa * xx[*aj];
1018           *aa = 0.0;
1019         }
1020         aa++;
1021         aj++;
1022       }
1023     }
1024   }
1025   if (x && b) {
1026     PetscCall(VecRestoreArray(b, &bb));
1027     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
1028   }
1029   PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a));
1030   PetscCall(VecRestoreArray(lmask, &mask));
1031   PetscCall(VecDestroy(&lmask));
1032   PetscCall(PetscFree(lrows));
1033 
1034   /* only change matrix nonzero state if pattern was allowed to be changed */
1035   if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) {
1036     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1037     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1038   }
1039   PetscFunctionReturn(PETSC_SUCCESS);
1040 }
1041 
1042 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy)
1043 {
1044   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1045   PetscInt    nt;
1046   VecScatter  Mvctx = a->Mvctx;
1047 
1048   PetscFunctionBegin;
1049   PetscCall(VecGetLocalSize(xx, &nt));
1050   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt);
1051   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1052   PetscUseTypeMethod(a->A, mult, xx, yy);
1053   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1054   PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy);
1055   PetscFunctionReturn(PETSC_SUCCESS);
1056 }
1057 
1058 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx)
1059 {
1060   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1061 
1062   PetscFunctionBegin;
1063   PetscCall(MatMultDiagonalBlock(a->A, bb, xx));
1064   PetscFunctionReturn(PETSC_SUCCESS);
1065 }
1066 
1067 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1068 {
1069   Mat_MPIAIJ *a     = (Mat_MPIAIJ *)A->data;
1070   VecScatter  Mvctx = a->Mvctx;
1071 
1072   PetscFunctionBegin;
1073   PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1074   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
1075   PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
1076   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
1077   PetscFunctionReturn(PETSC_SUCCESS);
1078 }
1079 
1080 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy)
1081 {
1082   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1083 
1084   PetscFunctionBegin;
1085   /* do nondiagonal part */
1086   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1087   /* do local part */
1088   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1089   /* add partial results together */
1090   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1091   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
1092   PetscFunctionReturn(PETSC_SUCCESS);
1093 }
1094 
1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f)
1096 {
1097   MPI_Comm    comm;
1098   Mat_MPIAIJ *Aij  = (Mat_MPIAIJ *)Amat->data, *Bij;
1099   Mat         Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs;
1100   IS          Me, Notme;
1101   PetscInt    M, N, first, last, *notme, i;
1102   PetscBool   lf;
1103   PetscMPIInt size;
1104 
1105   PetscFunctionBegin;
1106   /* Easy test: symmetric diagonal block */
1107   Bij  = (Mat_MPIAIJ *)Bmat->data;
1108   Bdia = Bij->A;
1109   PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf));
1110   PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat)));
1111   if (!*f) PetscFunctionReturn(PETSC_SUCCESS);
1112   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
1113   PetscCallMPI(MPI_Comm_size(comm, &size));
1114   if (size == 1) PetscFunctionReturn(PETSC_SUCCESS);
1115 
1116   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1117   PetscCall(MatGetSize(Amat, &M, &N));
1118   PetscCall(MatGetOwnershipRange(Amat, &first, &last));
1119   PetscCall(PetscMalloc1(N - last + first, &notme));
1120   for (i = 0; i < first; i++) notme[i] = i;
1121   for (i = last; i < M; i++) notme[i - last + first] = i;
1122   PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme));
1123   PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me));
1124   PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs));
1125   Aoff = Aoffs[0];
1126   PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs));
1127   Boff = Boffs[0];
1128   PetscCall(MatIsTranspose(Aoff, Boff, tol, f));
1129   PetscCall(MatDestroyMatrices(1, &Aoffs));
1130   PetscCall(MatDestroyMatrices(1, &Boffs));
1131   PetscCall(ISDestroy(&Me));
1132   PetscCall(ISDestroy(&Notme));
1133   PetscCall(PetscFree(notme));
1134   PetscFunctionReturn(PETSC_SUCCESS);
1135 }
1136 
1137 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f)
1138 {
1139   PetscFunctionBegin;
1140   PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f));
1141   PetscFunctionReturn(PETSC_SUCCESS);
1142 }
1143 
1144 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1145 {
1146   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1147 
1148   PetscFunctionBegin;
1149   /* do nondiagonal part */
1150   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1151   /* do local part */
1152   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1153   /* add partial results together */
1154   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1155   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
1156   PetscFunctionReturn(PETSC_SUCCESS);
1157 }
1158 
1159 /*
1160   This only works correctly for square matrices where the subblock A->A is the
1161    diagonal block
1162 */
1163 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v)
1164 {
1165   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1166 
1167   PetscFunctionBegin;
1168   PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
1169   PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition");
1170   PetscCall(MatGetDiagonal(a->A, v));
1171   PetscFunctionReturn(PETSC_SUCCESS);
1172 }
1173 
1174 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa)
1175 {
1176   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1177 
1178   PetscFunctionBegin;
1179   PetscCall(MatScale(a->A, aa));
1180   PetscCall(MatScale(a->B, aa));
1181   PetscFunctionReturn(PETSC_SUCCESS);
1182 }
1183 
1184 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */
1185 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat)
1186 {
1187   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1188 
1189   PetscFunctionBegin;
1190   PetscCall(PetscSFDestroy(&aij->coo_sf));
1191   PetscCall(PetscFree(aij->Aperm1));
1192   PetscCall(PetscFree(aij->Bperm1));
1193   PetscCall(PetscFree(aij->Ajmap1));
1194   PetscCall(PetscFree(aij->Bjmap1));
1195 
1196   PetscCall(PetscFree(aij->Aimap2));
1197   PetscCall(PetscFree(aij->Bimap2));
1198   PetscCall(PetscFree(aij->Aperm2));
1199   PetscCall(PetscFree(aij->Bperm2));
1200   PetscCall(PetscFree(aij->Ajmap2));
1201   PetscCall(PetscFree(aij->Bjmap2));
1202 
1203   PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf));
1204   PetscCall(PetscFree(aij->Cperm1));
1205   PetscFunctionReturn(PETSC_SUCCESS);
1206 }
1207 
1208 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
1209 {
1210   Mat_MPIAIJ        *aij    = (Mat_MPIAIJ *)mat->data;
1211   Mat_SeqAIJ        *A      = (Mat_SeqAIJ *)aij->A->data;
1212   Mat_SeqAIJ        *B      = (Mat_SeqAIJ *)aij->B->data;
1213   const PetscInt    *garray = aij->garray;
1214   const PetscScalar *aa, *ba;
1215   PetscInt           header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb;
1216   PetscInt          *rowlens;
1217   PetscInt          *colidxs;
1218   PetscScalar       *matvals;
1219 
1220   PetscFunctionBegin;
1221   PetscCall(PetscViewerSetUp(viewer));
1222 
1223   M  = mat->rmap->N;
1224   N  = mat->cmap->N;
1225   m  = mat->rmap->n;
1226   rs = mat->rmap->rstart;
1227   cs = mat->cmap->rstart;
1228   nz = A->nz + B->nz;
1229 
1230   /* write matrix header */
1231   header[0] = MAT_FILE_CLASSID;
1232   header[1] = M;
1233   header[2] = N;
1234   header[3] = nz;
1235   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
1236   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1237 
1238   /* fill in and store row lengths  */
1239   PetscCall(PetscMalloc1(m, &rowlens));
1240   for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i];
1241   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
1242   PetscCall(PetscFree(rowlens));
1243 
1244   /* fill in and store column indices */
1245   PetscCall(PetscMalloc1(nz, &colidxs));
1246   for (cnt = 0, i = 0; i < m; i++) {
1247     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1248       if (garray[B->j[jb]] > cs) break;
1249       colidxs[cnt++] = garray[B->j[jb]];
1250     }
1251     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs;
1252     for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]];
1253   }
1254   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1255   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
1256   PetscCall(PetscFree(colidxs));
1257 
1258   /* fill in and store nonzero values */
1259   PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa));
1260   PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba));
1261   PetscCall(PetscMalloc1(nz, &matvals));
1262   for (cnt = 0, i = 0; i < m; i++) {
1263     for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1264       if (garray[B->j[jb]] > cs) break;
1265       matvals[cnt++] = ba[jb];
1266     }
1267     for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja];
1268     for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb];
1269   }
1270   PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa));
1271   PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba));
1272   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
1273   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
1274   PetscCall(PetscFree(matvals));
1275 
1276   /* write block size option to the viewer's .info file */
1277   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1278   PetscFunctionReturn(PETSC_SUCCESS);
1279 }
1280 
1281 #include <petscdraw.h>
1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
1283 {
1284   Mat_MPIAIJ       *aij  = (Mat_MPIAIJ *)mat->data;
1285   PetscMPIInt       rank = aij->rank, size = aij->size;
1286   PetscBool         isdraw, iascii, isbinary;
1287   PetscViewer       sviewer;
1288   PetscViewerFormat format;
1289 
1290   PetscFunctionBegin;
1291   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1292   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1293   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1294   if (iascii) {
1295     PetscCall(PetscViewerGetFormat(viewer, &format));
1296     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1297       PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz;
1298       PetscCall(PetscMalloc1(size, &nz));
1299       PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
1300       for (i = 0; i < (PetscInt)size; i++) {
1301         nmax = PetscMax(nmax, nz[i]);
1302         nmin = PetscMin(nmin, nz[i]);
1303         navg += nz[i];
1304       }
1305       PetscCall(PetscFree(nz));
1306       navg = navg / size;
1307       PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT "  avg %" PetscInt_FMT "  max %" PetscInt_FMT "\n", nmin, navg, nmax));
1308       PetscFunctionReturn(PETSC_SUCCESS);
1309     }
1310     PetscCall(PetscViewerGetFormat(viewer, &format));
1311     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1312       MatInfo   info;
1313       PetscInt *inodes = NULL;
1314 
1315       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
1316       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
1317       PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL));
1318       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1319       if (!inodes) {
1320         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1321                                                      (double)info.memory));
1322       } else {
1323         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
1324                                                      (double)info.memory));
1325       }
1326       PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info));
1327       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1328       PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info));
1329       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
1330       PetscCall(PetscViewerFlush(viewer));
1331       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1332       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
1333       PetscCall(VecScatterView(aij->Mvctx, viewer));
1334       PetscFunctionReturn(PETSC_SUCCESS);
1335     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1336       PetscInt inodecount, inodelimit, *inodes;
1337       PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit));
1338       if (inodes) {
1339         PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit));
1340       } else {
1341         PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n"));
1342       }
1343       PetscFunctionReturn(PETSC_SUCCESS);
1344     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1345       PetscFunctionReturn(PETSC_SUCCESS);
1346     }
1347   } else if (isbinary) {
1348     if (size == 1) {
1349       PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1350       PetscCall(MatView(aij->A, viewer));
1351     } else {
1352       PetscCall(MatView_MPIAIJ_Binary(mat, viewer));
1353     }
1354     PetscFunctionReturn(PETSC_SUCCESS);
1355   } else if (iascii && size == 1) {
1356     PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name));
1357     PetscCall(MatView(aij->A, viewer));
1358     PetscFunctionReturn(PETSC_SUCCESS);
1359   } else if (isdraw) {
1360     PetscDraw draw;
1361     PetscBool isnull;
1362     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
1363     PetscCall(PetscDrawIsNull(draw, &isnull));
1364     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
1365   }
1366 
1367   { /* assemble the entire matrix onto first processor */
1368     Mat A = NULL, Av;
1369     IS  isrow, iscol;
1370 
1371     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow));
1372     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol));
1373     PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A));
1374     PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL));
1375     /*  The commented code uses MatCreateSubMatrices instead */
1376     /*
1377     Mat *AA, A = NULL, Av;
1378     IS  isrow,iscol;
1379 
1380     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow));
1381     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol));
1382     PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA));
1383     if (rank == 0) {
1384        PetscCall(PetscObjectReference((PetscObject)AA[0]));
1385        A    = AA[0];
1386        Av   = AA[0];
1387     }
1388     PetscCall(MatDestroySubMatrices(1,&AA));
1389 */
1390     PetscCall(ISDestroy(&iscol));
1391     PetscCall(ISDestroy(&isrow));
1392     /*
1393        Everyone has to call to draw the matrix since the graphics waits are
1394        synchronized across all processors that share the PetscDraw object
1395     */
1396     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1397     if (rank == 0) {
1398       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name));
1399       PetscCall(MatView_SeqAIJ(Av, sviewer));
1400     }
1401     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1402     PetscCall(PetscViewerFlush(viewer));
1403     PetscCall(MatDestroy(&A));
1404   }
1405   PetscFunctionReturn(PETSC_SUCCESS);
1406 }
1407 
1408 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer)
1409 {
1410   PetscBool iascii, isdraw, issocket, isbinary;
1411 
1412   PetscFunctionBegin;
1413   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1414   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1415   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1416   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1417   if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer));
1418   PetscFunctionReturn(PETSC_SUCCESS);
1419 }
1420 
1421 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
1422 {
1423   Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data;
1424   Vec         bb1 = NULL;
1425   PetscBool   hasop;
1426 
1427   PetscFunctionBegin;
1428   if (flag == SOR_APPLY_UPPER) {
1429     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1430     PetscFunctionReturn(PETSC_SUCCESS);
1431   }
1432 
1433   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1));
1434 
1435   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1436     if (flag & SOR_ZERO_INITIAL_GUESS) {
1437       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1438       its--;
1439     }
1440 
1441     while (its--) {
1442       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1443       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1444 
1445       /* update rhs: bb1 = bb - B*x */
1446       PetscCall(VecScale(mat->lvec, -1.0));
1447       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1448 
1449       /* local sweep */
1450       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
1451     }
1452   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1453     if (flag & SOR_ZERO_INITIAL_GUESS) {
1454       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1455       its--;
1456     }
1457     while (its--) {
1458       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1459       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1460 
1461       /* update rhs: bb1 = bb - B*x */
1462       PetscCall(VecScale(mat->lvec, -1.0));
1463       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1464 
1465       /* local sweep */
1466       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
1467     }
1468   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1469     if (flag & SOR_ZERO_INITIAL_GUESS) {
1470       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
1471       its--;
1472     }
1473     while (its--) {
1474       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1475       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1476 
1477       /* update rhs: bb1 = bb - B*x */
1478       PetscCall(VecScale(mat->lvec, -1.0));
1479       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
1480 
1481       /* local sweep */
1482       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
1483     }
1484   } else if (flag & SOR_EISENSTAT) {
1485     Vec xx1;
1486 
1487     PetscCall(VecDuplicate(bb, &xx1));
1488     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
1489 
1490     PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1491     PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
1492     if (!mat->diag) {
1493       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
1494       PetscCall(MatGetDiagonal(matin, mat->diag));
1495     }
1496     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
1497     if (hasop) {
1498       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
1499     } else {
1500       PetscCall(VecPointwiseMult(bb1, mat->diag, xx));
1501     }
1502     PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb));
1503 
1504     PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1));
1505 
1506     /* local sweep */
1507     PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
1508     PetscCall(VecAXPY(xx, 1.0, xx1));
1509     PetscCall(VecDestroy(&xx1));
1510   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported");
1511 
1512   PetscCall(VecDestroy(&bb1));
1513 
1514   matin->factorerrortype = mat->A->factorerrortype;
1515   PetscFunctionReturn(PETSC_SUCCESS);
1516 }
1517 
1518 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B)
1519 {
1520   Mat             aA, aB, Aperm;
1521   const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj;
1522   PetscScalar    *aa, *ba;
1523   PetscInt        i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest;
1524   PetscSF         rowsf, sf;
1525   IS              parcolp = NULL;
1526   PetscBool       done;
1527 
1528   PetscFunctionBegin;
1529   PetscCall(MatGetLocalSize(A, &m, &n));
1530   PetscCall(ISGetIndices(rowp, &rwant));
1531   PetscCall(ISGetIndices(colp, &cwant));
1532   PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest));
1533 
1534   /* Invert row permutation to find out where my rows should go */
1535   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf));
1536   PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant));
1537   PetscCall(PetscSFSetFromOptions(rowsf));
1538   for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i;
1539   PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1540   PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE));
1541 
1542   /* Invert column permutation to find out where my columns should go */
1543   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1544   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant));
1545   PetscCall(PetscSFSetFromOptions(sf));
1546   for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i;
1547   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1548   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE));
1549   PetscCall(PetscSFDestroy(&sf));
1550 
1551   PetscCall(ISRestoreIndices(rowp, &rwant));
1552   PetscCall(ISRestoreIndices(colp, &cwant));
1553   PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols));
1554 
1555   /* Find out where my gcols should go */
1556   PetscCall(MatGetSize(aB, NULL, &ng));
1557   PetscCall(PetscMalloc1(ng, &gcdest));
1558   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1559   PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols));
1560   PetscCall(PetscSFSetFromOptions(sf));
1561   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1562   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE));
1563   PetscCall(PetscSFDestroy(&sf));
1564 
1565   PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz));
1566   PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1567   PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1568   for (i = 0; i < m; i++) {
1569     PetscInt    row = rdest[i];
1570     PetscMPIInt rowner;
1571     PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner));
1572     for (j = ai[i]; j < ai[i + 1]; j++) {
1573       PetscInt    col = cdest[aj[j]];
1574       PetscMPIInt cowner;
1575       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */
1576       if (rowner == cowner) dnnz[i]++;
1577       else onnz[i]++;
1578     }
1579     for (j = bi[i]; j < bi[i + 1]; j++) {
1580       PetscInt    col = gcdest[bj[j]];
1581       PetscMPIInt cowner;
1582       PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner));
1583       if (rowner == cowner) dnnz[i]++;
1584       else onnz[i]++;
1585     }
1586   }
1587   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1588   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE));
1589   PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1590   PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE));
1591   PetscCall(PetscSFDestroy(&rowsf));
1592 
1593   PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm));
1594   PetscCall(MatSeqAIJGetArray(aA, &aa));
1595   PetscCall(MatSeqAIJGetArray(aB, &ba));
1596   for (i = 0; i < m; i++) {
1597     PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */
1598     PetscInt  j0, rowlen;
1599     rowlen = ai[i + 1] - ai[i];
1600     for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */
1601       for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]];
1602       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES));
1603     }
1604     rowlen = bi[i + 1] - bi[i];
1605     for (j0 = j = 0; j < rowlen; j0 = j) {
1606       for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]];
1607       PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES));
1608     }
1609   }
1610   PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY));
1611   PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY));
1612   PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done));
1613   PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done));
1614   PetscCall(MatSeqAIJRestoreArray(aA, &aa));
1615   PetscCall(MatSeqAIJRestoreArray(aB, &ba));
1616   PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz));
1617   PetscCall(PetscFree3(work, rdest, cdest));
1618   PetscCall(PetscFree(gcdest));
1619   if (parcolp) PetscCall(ISDestroy(&colp));
1620   *B = Aperm;
1621   PetscFunctionReturn(PETSC_SUCCESS);
1622 }
1623 
1624 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[])
1625 {
1626   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1627 
1628   PetscFunctionBegin;
1629   PetscCall(MatGetSize(aij->B, NULL, nghosts));
1630   if (ghosts) *ghosts = aij->garray;
1631   PetscFunctionReturn(PETSC_SUCCESS);
1632 }
1633 
1634 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1635 {
1636   Mat_MPIAIJ    *mat = (Mat_MPIAIJ *)matin->data;
1637   Mat            A = mat->A, B = mat->B;
1638   PetscLogDouble isend[5], irecv[5];
1639 
1640   PetscFunctionBegin;
1641   info->block_size = 1.0;
1642   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1643 
1644   isend[0] = info->nz_used;
1645   isend[1] = info->nz_allocated;
1646   isend[2] = info->nz_unneeded;
1647   isend[3] = info->memory;
1648   isend[4] = info->mallocs;
1649 
1650   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1651 
1652   isend[0] += info->nz_used;
1653   isend[1] += info->nz_allocated;
1654   isend[2] += info->nz_unneeded;
1655   isend[3] += info->memory;
1656   isend[4] += info->mallocs;
1657   if (flag == MAT_LOCAL) {
1658     info->nz_used      = isend[0];
1659     info->nz_allocated = isend[1];
1660     info->nz_unneeded  = isend[2];
1661     info->memory       = isend[3];
1662     info->mallocs      = isend[4];
1663   } else if (flag == MAT_GLOBAL_MAX) {
1664     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   } else if (flag == MAT_GLOBAL_SUM) {
1672     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1673 
1674     info->nz_used      = irecv[0];
1675     info->nz_allocated = irecv[1];
1676     info->nz_unneeded  = irecv[2];
1677     info->memory       = irecv[3];
1678     info->mallocs      = irecv[4];
1679   }
1680   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1681   info->fill_ratio_needed = 0;
1682   info->factor_mallocs    = 0;
1683   PetscFunctionReturn(PETSC_SUCCESS);
1684 }
1685 
1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg)
1687 {
1688   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1689 
1690   PetscFunctionBegin;
1691   switch (op) {
1692   case MAT_NEW_NONZERO_LOCATIONS:
1693   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1694   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1695   case MAT_KEEP_NONZERO_PATTERN:
1696   case MAT_NEW_NONZERO_LOCATION_ERR:
1697   case MAT_USE_INODES:
1698   case MAT_IGNORE_ZERO_ENTRIES:
1699   case MAT_FORM_EXPLICIT_TRANSPOSE:
1700     MatCheckPreallocated(A, 1);
1701     PetscCall(MatSetOption(a->A, op, flg));
1702     PetscCall(MatSetOption(a->B, op, flg));
1703     break;
1704   case MAT_ROW_ORIENTED:
1705     MatCheckPreallocated(A, 1);
1706     a->roworiented = flg;
1707 
1708     PetscCall(MatSetOption(a->A, op, flg));
1709     PetscCall(MatSetOption(a->B, op, flg));
1710     break;
1711   case MAT_FORCE_DIAGONAL_ENTRIES:
1712   case MAT_SORTED_FULL:
1713     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1714     break;
1715   case MAT_IGNORE_OFF_PROC_ENTRIES:
1716     a->donotstash = flg;
1717     break;
1718   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1719   case MAT_SPD:
1720   case MAT_SYMMETRIC:
1721   case MAT_STRUCTURALLY_SYMMETRIC:
1722   case MAT_HERMITIAN:
1723   case MAT_SYMMETRY_ETERNAL:
1724   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1725   case MAT_SPD_ETERNAL:
1726     /* if the diagonal matrix is square it inherits some of the properties above */
1727     break;
1728   case MAT_SUBMAT_SINGLEIS:
1729     A->submat_singleis = flg;
1730     break;
1731   case MAT_STRUCTURE_ONLY:
1732     /* The option is handled directly by MatSetOption() */
1733     break;
1734   default:
1735     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1736   }
1737   PetscFunctionReturn(PETSC_SUCCESS);
1738 }
1739 
1740 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1741 {
1742   Mat_MPIAIJ  *mat = (Mat_MPIAIJ *)matin->data;
1743   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1744   PetscInt     i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart;
1745   PetscInt     nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend;
1746   PetscInt    *cmap, *idx_p;
1747 
1748   PetscFunctionBegin;
1749   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1750   mat->getrowactive = PETSC_TRUE;
1751 
1752   if (!mat->rowvalues && (idx || v)) {
1753     /*
1754         allocate enough space to hold information from the longest row.
1755     */
1756     Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data;
1757     PetscInt    max = 1, tmp;
1758     for (i = 0; i < matin->rmap->n; i++) {
1759       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
1760       if (max < tmp) max = tmp;
1761     }
1762     PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices));
1763   }
1764 
1765   PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows");
1766   lrow = row - rstart;
1767 
1768   pvA = &vworkA;
1769   pcA = &cworkA;
1770   pvB = &vworkB;
1771   pcB = &cworkB;
1772   if (!v) {
1773     pvA = NULL;
1774     pvB = NULL;
1775   }
1776   if (!idx) {
1777     pcA = NULL;
1778     if (!v) pcB = NULL;
1779   }
1780   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1781   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1782   nztot = nzA + nzB;
1783 
1784   cmap = mat->garray;
1785   if (v || idx) {
1786     if (nztot) {
1787       /* Sort by increasing column numbers, assuming A and B already sorted */
1788       PetscInt imark = -1;
1789       if (v) {
1790         *v = v_p = mat->rowvalues;
1791         for (i = 0; i < nzB; i++) {
1792           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1793           else break;
1794         }
1795         imark = i;
1796         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1797         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1798       }
1799       if (idx) {
1800         *idx = idx_p = mat->rowindices;
1801         if (imark > -1) {
1802           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]];
1803         } else {
1804           for (i = 0; i < nzB; i++) {
1805             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1806             else break;
1807           }
1808           imark = i;
1809         }
1810         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i];
1811         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]];
1812       }
1813     } else {
1814       if (idx) *idx = NULL;
1815       if (v) *v = NULL;
1816     }
1817   }
1818   *nz = nztot;
1819   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1820   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1821   PetscFunctionReturn(PETSC_SUCCESS);
1822 }
1823 
1824 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1825 {
1826   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1827 
1828   PetscFunctionBegin;
1829   PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1830   aij->getrowactive = PETSC_FALSE;
1831   PetscFunctionReturn(PETSC_SUCCESS);
1832 }
1833 
1834 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm)
1835 {
1836   Mat_MPIAIJ      *aij  = (Mat_MPIAIJ *)mat->data;
1837   Mat_SeqAIJ      *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data;
1838   PetscInt         i, j, cstart = mat->cmap->rstart;
1839   PetscReal        sum = 0.0;
1840   const MatScalar *v, *amata, *bmata;
1841 
1842   PetscFunctionBegin;
1843   if (aij->size == 1) {
1844     PetscCall(MatNorm(aij->A, type, norm));
1845   } else {
1846     PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata));
1847     PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata));
1848     if (type == NORM_FROBENIUS) {
1849       v = amata;
1850       for (i = 0; i < amat->nz; i++) {
1851         sum += PetscRealPart(PetscConj(*v) * (*v));
1852         v++;
1853       }
1854       v = bmata;
1855       for (i = 0; i < bmat->nz; i++) {
1856         sum += PetscRealPart(PetscConj(*v) * (*v));
1857         v++;
1858       }
1859       PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1860       *norm = PetscSqrtReal(*norm);
1861       PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz));
1862     } else if (type == NORM_1) { /* max column norm */
1863       PetscReal *tmp, *tmp2;
1864       PetscInt  *jj, *garray = aij->garray;
1865       PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp));
1866       PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2));
1867       *norm = 0.0;
1868       v     = amata;
1869       jj    = amat->j;
1870       for (j = 0; j < amat->nz; j++) {
1871         tmp[cstart + *jj++] += PetscAbsScalar(*v);
1872         v++;
1873       }
1874       v  = bmata;
1875       jj = bmat->j;
1876       for (j = 0; j < bmat->nz; j++) {
1877         tmp[garray[*jj++]] += PetscAbsScalar(*v);
1878         v++;
1879       }
1880       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
1881       for (j = 0; j < mat->cmap->N; j++) {
1882         if (tmp2[j] > *norm) *norm = tmp2[j];
1883       }
1884       PetscCall(PetscFree(tmp));
1885       PetscCall(PetscFree(tmp2));
1886       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1887     } else if (type == NORM_INFINITY) { /* max row norm */
1888       PetscReal ntemp = 0.0;
1889       for (j = 0; j < aij->A->rmap->n; j++) {
1890         v   = amata + amat->i[j];
1891         sum = 0.0;
1892         for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) {
1893           sum += PetscAbsScalar(*v);
1894           v++;
1895         }
1896         v = bmata + bmat->i[j];
1897         for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) {
1898           sum += PetscAbsScalar(*v);
1899           v++;
1900         }
1901         if (sum > ntemp) ntemp = sum;
1902       }
1903       PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
1904       PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0)));
1905     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm");
1906     PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata));
1907     PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata));
1908   }
1909   PetscFunctionReturn(PETSC_SUCCESS);
1910 }
1911 
1912 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout)
1913 {
1914   Mat_MPIAIJ      *a    = (Mat_MPIAIJ *)A->data, *b;
1915   Mat_SeqAIJ      *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag;
1916   PetscInt         M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol;
1917   const PetscInt  *ai, *aj, *bi, *bj, *B_diag_i;
1918   Mat              B, A_diag, *B_diag;
1919   const MatScalar *pbv, *bv;
1920 
1921   PetscFunctionBegin;
1922   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1923   ma = A->rmap->n;
1924   na = A->cmap->n;
1925   mb = a->B->rmap->n;
1926   nb = a->B->cmap->n;
1927   ai = Aloc->i;
1928   aj = Aloc->j;
1929   bi = Bloc->i;
1930   bj = Bloc->j;
1931   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1932     PetscInt            *d_nnz, *g_nnz, *o_nnz;
1933     PetscSFNode         *oloc;
1934     PETSC_UNUSED PetscSF sf;
1935 
1936     PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc));
1937     /* compute d_nnz for preallocation */
1938     PetscCall(PetscArrayzero(d_nnz, na));
1939     for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++;
1940     /* compute local off-diagonal contributions */
1941     PetscCall(PetscArrayzero(g_nnz, nb));
1942     for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++;
1943     /* map those to global */
1944     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1945     PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray));
1946     PetscCall(PetscSFSetFromOptions(sf));
1947     PetscCall(PetscArrayzero(o_nnz, na));
1948     PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1949     PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM));
1950     PetscCall(PetscSFDestroy(&sf));
1951 
1952     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1953     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
1954     PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
1955     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
1956     PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
1957     PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc));
1958   } else {
1959     B = *matout;
1960     PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
1961   }
1962 
1963   b           = (Mat_MPIAIJ *)B->data;
1964   A_diag      = a->A;
1965   B_diag      = &b->A;
1966   sub_B_diag  = (Mat_SeqAIJ *)(*B_diag)->data;
1967   A_diag_ncol = A_diag->cmap->N;
1968   B_diag_ilen = sub_B_diag->ilen;
1969   B_diag_i    = sub_B_diag->i;
1970 
1971   /* Set ilen for diagonal of B */
1972   for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i];
1973 
1974   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1975   very quickly (=without using MatSetValues), because all writes are local. */
1976   PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag));
1977   PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag));
1978 
1979   /* copy over the B part */
1980   PetscCall(PetscMalloc1(bi[mb], &cols));
1981   PetscCall(MatSeqAIJGetArrayRead(a->B, &bv));
1982   pbv = bv;
1983   row = A->rmap->rstart;
1984   for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]];
1985   cols_tmp = cols;
1986   for (i = 0; i < mb; i++) {
1987     ncol = bi[i + 1] - bi[i];
1988     PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES));
1989     row++;
1990     pbv += ncol;
1991     cols_tmp += ncol;
1992   }
1993   PetscCall(PetscFree(cols));
1994   PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv));
1995 
1996   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1997   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
1998   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1999     *matout = B;
2000   } else {
2001     PetscCall(MatHeaderMerge(A, &B));
2002   }
2003   PetscFunctionReturn(PETSC_SUCCESS);
2004 }
2005 
2006 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr)
2007 {
2008   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2009   Mat         a = aij->A, b = aij->B;
2010   PetscInt    s1, s2, s3;
2011 
2012   PetscFunctionBegin;
2013   PetscCall(MatGetLocalSize(mat, &s2, &s3));
2014   if (rr) {
2015     PetscCall(VecGetLocalSize(rr, &s1));
2016     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
2017     /* Overlap communication with computation. */
2018     PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2019   }
2020   if (ll) {
2021     PetscCall(VecGetLocalSize(ll, &s1));
2022     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
2023     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
2024   }
2025   /* scale  the diagonal block */
2026   PetscUseTypeMethod(a, diagonalscale, ll, rr);
2027 
2028   if (rr) {
2029     /* Do a scatter end and then right scale the off-diagonal block */
2030     PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD));
2031     PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec);
2032   }
2033   PetscFunctionReturn(PETSC_SUCCESS);
2034 }
2035 
2036 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2037 {
2038   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2039 
2040   PetscFunctionBegin;
2041   PetscCall(MatSetUnfactored(a->A));
2042   PetscFunctionReturn(PETSC_SUCCESS);
2043 }
2044 
2045 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag)
2046 {
2047   Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data;
2048   Mat         a, b, c, d;
2049   PetscBool   flg;
2050 
2051   PetscFunctionBegin;
2052   a = matA->A;
2053   b = matA->B;
2054   c = matB->A;
2055   d = matB->B;
2056 
2057   PetscCall(MatEqual(a, c, &flg));
2058   if (flg) PetscCall(MatEqual(b, d, &flg));
2059   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
2060   PetscFunctionReturn(PETSC_SUCCESS);
2061 }
2062 
2063 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str)
2064 {
2065   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2066   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
2067 
2068   PetscFunctionBegin;
2069   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2070   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2071     /* because of the column compression in the off-processor part of the matrix a->B,
2072        the number of columns in a->B and b->B may be different, hence we cannot call
2073        the MatCopy() directly on the two parts. If need be, we can provide a more
2074        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2075        then copying the submatrices */
2076     PetscCall(MatCopy_Basic(A, B, str));
2077   } else {
2078     PetscCall(MatCopy(a->A, b->A, str));
2079     PetscCall(MatCopy(a->B, b->B, str));
2080   }
2081   PetscCall(PetscObjectStateIncrease((PetscObject)B));
2082   PetscFunctionReturn(PETSC_SUCCESS);
2083 }
2084 
2085 /*
2086    Computes the number of nonzeros per row needed for preallocation when X and Y
2087    have different nonzero structure.
2088 */
2089 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz)
2090 {
2091   PetscInt i, j, k, nzx, nzy;
2092 
2093   PetscFunctionBegin;
2094   /* Set the number of nonzeros in the new matrix */
2095   for (i = 0; i < m; i++) {
2096     const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i];
2097     nzx    = xi[i + 1] - xi[i];
2098     nzy    = yi[i + 1] - yi[i];
2099     nnz[i] = 0;
2100     for (j = 0, k = 0; j < nzx; j++) {                                /* Point in X */
2101       for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2102       if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++;             /* Skip duplicate */
2103       nnz[i]++;
2104     }
2105     for (; k < nzy; k++) nnz[i]++;
2106   }
2107   PetscFunctionReturn(PETSC_SUCCESS);
2108 }
2109 
2110 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2111 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz)
2112 {
2113   PetscInt    m = Y->rmap->N;
2114   Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data;
2115   Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data;
2116 
2117   PetscFunctionBegin;
2118   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
2119   PetscFunctionReturn(PETSC_SUCCESS);
2120 }
2121 
2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2123 {
2124   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data;
2125 
2126   PetscFunctionBegin;
2127   if (str == SAME_NONZERO_PATTERN) {
2128     PetscCall(MatAXPY(yy->A, a, xx->A, str));
2129     PetscCall(MatAXPY(yy->B, a, xx->B, str));
2130   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2131     PetscCall(MatAXPY_Basic(Y, a, X, str));
2132   } else {
2133     Mat       B;
2134     PetscInt *nnz_d, *nnz_o;
2135 
2136     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
2137     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
2138     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
2139     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
2140     PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap));
2141     PetscCall(MatSetType(B, ((PetscObject)Y)->type_name));
2142     PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d));
2143     PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
2144     PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o));
2145     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
2146     PetscCall(MatHeaderMerge(Y, &B));
2147     PetscCall(PetscFree(nnz_d));
2148     PetscCall(PetscFree(nnz_o));
2149   }
2150   PetscFunctionReturn(PETSC_SUCCESS);
2151 }
2152 
2153 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2154 
2155 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2156 {
2157   PetscFunctionBegin;
2158   if (PetscDefined(USE_COMPLEX)) {
2159     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2160 
2161     PetscCall(MatConjugate_SeqAIJ(aij->A));
2162     PetscCall(MatConjugate_SeqAIJ(aij->B));
2163   }
2164   PetscFunctionReturn(PETSC_SUCCESS);
2165 }
2166 
2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2168 {
2169   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2170 
2171   PetscFunctionBegin;
2172   PetscCall(MatRealPart(a->A));
2173   PetscCall(MatRealPart(a->B));
2174   PetscFunctionReturn(PETSC_SUCCESS);
2175 }
2176 
2177 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2178 {
2179   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2180 
2181   PetscFunctionBegin;
2182   PetscCall(MatImaginaryPart(a->A));
2183   PetscCall(MatImaginaryPart(a->B));
2184   PetscFunctionReturn(PETSC_SUCCESS);
2185 }
2186 
2187 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2188 {
2189   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
2190   PetscInt           i, *idxb = NULL, m = A->rmap->n;
2191   PetscScalar       *va, *vv;
2192   Vec                vB, vA;
2193   const PetscScalar *vb;
2194 
2195   PetscFunctionBegin;
2196   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
2197   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
2198 
2199   PetscCall(VecGetArrayWrite(vA, &va));
2200   if (idx) {
2201     for (i = 0; i < m; i++) {
2202       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2203     }
2204   }
2205 
2206   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
2207   PetscCall(PetscMalloc1(m, &idxb));
2208   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
2209 
2210   PetscCall(VecGetArrayWrite(v, &vv));
2211   PetscCall(VecGetArrayRead(vB, &vb));
2212   for (i = 0; i < m; i++) {
2213     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2214       vv[i] = vb[i];
2215       if (idx) idx[i] = a->garray[idxb[i]];
2216     } else {
2217       vv[i] = va[i];
2218       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]];
2219     }
2220   }
2221   PetscCall(VecRestoreArrayWrite(vA, &vv));
2222   PetscCall(VecRestoreArrayWrite(vA, &va));
2223   PetscCall(VecRestoreArrayRead(vB, &vb));
2224   PetscCall(PetscFree(idxb));
2225   PetscCall(VecDestroy(&vA));
2226   PetscCall(VecDestroy(&vB));
2227   PetscFunctionReturn(PETSC_SUCCESS);
2228 }
2229 
2230 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2231 {
2232   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2233   PetscInt           m = A->rmap->n, n = A->cmap->n;
2234   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2235   PetscInt          *cmap = mat->garray;
2236   PetscInt          *diagIdx, *offdiagIdx;
2237   Vec                diagV, offdiagV;
2238   PetscScalar       *a, *diagA, *offdiagA;
2239   const PetscScalar *ba, *bav;
2240   PetscInt           r, j, col, ncols, *bi, *bj;
2241   Mat                B = mat->B;
2242   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2243 
2244   PetscFunctionBegin;
2245   /* When a process holds entire A and other processes have no entry */
2246   if (A->cmap->N == n) {
2247     PetscCall(VecGetArrayWrite(v, &diagA));
2248     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2249     PetscCall(MatGetRowMinAbs(mat->A, diagV, idx));
2250     PetscCall(VecDestroy(&diagV));
2251     PetscCall(VecRestoreArrayWrite(v, &diagA));
2252     PetscFunctionReturn(PETSC_SUCCESS);
2253   } else if (n == 0) {
2254     if (m) {
2255       PetscCall(VecGetArrayWrite(v, &a));
2256       for (r = 0; r < m; r++) {
2257         a[r] = 0.0;
2258         if (idx) idx[r] = -1;
2259       }
2260       PetscCall(VecRestoreArrayWrite(v, &a));
2261     }
2262     PetscFunctionReturn(PETSC_SUCCESS);
2263   }
2264 
2265   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2266   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2267   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2268   PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx));
2269 
2270   /* Get offdiagIdx[] for implicit 0.0 */
2271   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2272   ba = bav;
2273   bi = b->i;
2274   bj = b->j;
2275   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2276   for (r = 0; r < m; r++) {
2277     ncols = bi[r + 1] - bi[r];
2278     if (ncols == A->cmap->N - n) { /* Brow is dense */
2279       offdiagA[r]   = *ba;
2280       offdiagIdx[r] = cmap[0];
2281     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2282       offdiagA[r] = 0.0;
2283 
2284       /* Find first hole in the cmap */
2285       for (j = 0; j < ncols; j++) {
2286         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2287         if (col > j && j < cstart) {
2288           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2289           break;
2290         } else if (col > j + n && j >= cstart) {
2291           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2292           break;
2293         }
2294       }
2295       if (j == ncols && ncols < A->cmap->N - n) {
2296         /* a hole is outside compressed Bcols */
2297         if (ncols == 0) {
2298           if (cstart) {
2299             offdiagIdx[r] = 0;
2300           } else offdiagIdx[r] = cend;
2301         } else { /* ncols > 0 */
2302           offdiagIdx[r] = cmap[ncols - 1] + 1;
2303           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2304         }
2305       }
2306     }
2307 
2308     for (j = 0; j < ncols; j++) {
2309       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {
2310         offdiagA[r]   = *ba;
2311         offdiagIdx[r] = cmap[*bj];
2312       }
2313       ba++;
2314       bj++;
2315     }
2316   }
2317 
2318   PetscCall(VecGetArrayWrite(v, &a));
2319   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2320   for (r = 0; r < m; ++r) {
2321     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2322       a[r] = diagA[r];
2323       if (idx) idx[r] = cstart + diagIdx[r];
2324     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2325       a[r] = diagA[r];
2326       if (idx) {
2327         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2328           idx[r] = cstart + diagIdx[r];
2329         } else idx[r] = offdiagIdx[r];
2330       }
2331     } else {
2332       a[r] = offdiagA[r];
2333       if (idx) idx[r] = offdiagIdx[r];
2334     }
2335   }
2336   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2337   PetscCall(VecRestoreArrayWrite(v, &a));
2338   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2339   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2340   PetscCall(VecDestroy(&diagV));
2341   PetscCall(VecDestroy(&offdiagV));
2342   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2343   PetscFunctionReturn(PETSC_SUCCESS);
2344 }
2345 
2346 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2347 {
2348   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2349   PetscInt           m = A->rmap->n, n = A->cmap->n;
2350   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2351   PetscInt          *cmap = mat->garray;
2352   PetscInt          *diagIdx, *offdiagIdx;
2353   Vec                diagV, offdiagV;
2354   PetscScalar       *a, *diagA, *offdiagA;
2355   const PetscScalar *ba, *bav;
2356   PetscInt           r, j, col, ncols, *bi, *bj;
2357   Mat                B = mat->B;
2358   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2359 
2360   PetscFunctionBegin;
2361   /* When a process holds entire A and other processes have no entry */
2362   if (A->cmap->N == n) {
2363     PetscCall(VecGetArrayWrite(v, &diagA));
2364     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2365     PetscCall(MatGetRowMin(mat->A, diagV, idx));
2366     PetscCall(VecDestroy(&diagV));
2367     PetscCall(VecRestoreArrayWrite(v, &diagA));
2368     PetscFunctionReturn(PETSC_SUCCESS);
2369   } else if (n == 0) {
2370     if (m) {
2371       PetscCall(VecGetArrayWrite(v, &a));
2372       for (r = 0; r < m; r++) {
2373         a[r] = PETSC_MAX_REAL;
2374         if (idx) idx[r] = -1;
2375       }
2376       PetscCall(VecRestoreArrayWrite(v, &a));
2377     }
2378     PetscFunctionReturn(PETSC_SUCCESS);
2379   }
2380 
2381   PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx));
2382   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2383   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2384   PetscCall(MatGetRowMin(mat->A, diagV, diagIdx));
2385 
2386   /* Get offdiagIdx[] for implicit 0.0 */
2387   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2388   ba = bav;
2389   bi = b->i;
2390   bj = b->j;
2391   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2392   for (r = 0; r < m; r++) {
2393     ncols = bi[r + 1] - bi[r];
2394     if (ncols == A->cmap->N - n) { /* Brow is dense */
2395       offdiagA[r]   = *ba;
2396       offdiagIdx[r] = cmap[0];
2397     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2398       offdiagA[r] = 0.0;
2399 
2400       /* Find first hole in the cmap */
2401       for (j = 0; j < ncols; j++) {
2402         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2403         if (col > j && j < cstart) {
2404           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2405           break;
2406         } else if (col > j + n && j >= cstart) {
2407           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2408           break;
2409         }
2410       }
2411       if (j == ncols && ncols < A->cmap->N - n) {
2412         /* a hole is outside compressed Bcols */
2413         if (ncols == 0) {
2414           if (cstart) {
2415             offdiagIdx[r] = 0;
2416           } else offdiagIdx[r] = cend;
2417         } else { /* ncols > 0 */
2418           offdiagIdx[r] = cmap[ncols - 1] + 1;
2419           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2420         }
2421       }
2422     }
2423 
2424     for (j = 0; j < ncols; j++) {
2425       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {
2426         offdiagA[r]   = *ba;
2427         offdiagIdx[r] = cmap[*bj];
2428       }
2429       ba++;
2430       bj++;
2431     }
2432   }
2433 
2434   PetscCall(VecGetArrayWrite(v, &a));
2435   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2436   for (r = 0; r < m; ++r) {
2437     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2438       a[r] = diagA[r];
2439       if (idx) idx[r] = cstart + diagIdx[r];
2440     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2441       a[r] = diagA[r];
2442       if (idx) {
2443         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2444           idx[r] = cstart + diagIdx[r];
2445         } else idx[r] = offdiagIdx[r];
2446       }
2447     } else {
2448       a[r] = offdiagA[r];
2449       if (idx) idx[r] = offdiagIdx[r];
2450     }
2451   }
2452   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2453   PetscCall(VecRestoreArrayWrite(v, &a));
2454   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2455   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2456   PetscCall(VecDestroy(&diagV));
2457   PetscCall(VecDestroy(&offdiagV));
2458   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2459   PetscFunctionReturn(PETSC_SUCCESS);
2460 }
2461 
2462 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2463 {
2464   Mat_MPIAIJ        *mat = (Mat_MPIAIJ *)A->data;
2465   PetscInt           m = A->rmap->n, n = A->cmap->n;
2466   PetscInt           cstart = A->cmap->rstart, cend = A->cmap->rend;
2467   PetscInt          *cmap = mat->garray;
2468   PetscInt          *diagIdx, *offdiagIdx;
2469   Vec                diagV, offdiagV;
2470   PetscScalar       *a, *diagA, *offdiagA;
2471   const PetscScalar *ba, *bav;
2472   PetscInt           r, j, col, ncols, *bi, *bj;
2473   Mat                B = mat->B;
2474   Mat_SeqAIJ        *b = (Mat_SeqAIJ *)B->data;
2475 
2476   PetscFunctionBegin;
2477   /* When a process holds entire A and other processes have no entry */
2478   if (A->cmap->N == n) {
2479     PetscCall(VecGetArrayWrite(v, &diagA));
2480     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV));
2481     PetscCall(MatGetRowMax(mat->A, diagV, idx));
2482     PetscCall(VecDestroy(&diagV));
2483     PetscCall(VecRestoreArrayWrite(v, &diagA));
2484     PetscFunctionReturn(PETSC_SUCCESS);
2485   } else if (n == 0) {
2486     if (m) {
2487       PetscCall(VecGetArrayWrite(v, &a));
2488       for (r = 0; r < m; r++) {
2489         a[r] = PETSC_MIN_REAL;
2490         if (idx) idx[r] = -1;
2491       }
2492       PetscCall(VecRestoreArrayWrite(v, &a));
2493     }
2494     PetscFunctionReturn(PETSC_SUCCESS);
2495   }
2496 
2497   PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx));
2498   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV));
2499   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV));
2500   PetscCall(MatGetRowMax(mat->A, diagV, diagIdx));
2501 
2502   /* Get offdiagIdx[] for implicit 0.0 */
2503   PetscCall(MatSeqAIJGetArrayRead(B, &bav));
2504   ba = bav;
2505   bi = b->i;
2506   bj = b->j;
2507   PetscCall(VecGetArrayWrite(offdiagV, &offdiagA));
2508   for (r = 0; r < m; r++) {
2509     ncols = bi[r + 1] - bi[r];
2510     if (ncols == A->cmap->N - n) { /* Brow is dense */
2511       offdiagA[r]   = *ba;
2512       offdiagIdx[r] = cmap[0];
2513     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2514       offdiagA[r] = 0.0;
2515 
2516       /* Find first hole in the cmap */
2517       for (j = 0; j < ncols; j++) {
2518         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2519         if (col > j && j < cstart) {
2520           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2521           break;
2522         } else if (col > j + n && j >= cstart) {
2523           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2524           break;
2525         }
2526       }
2527       if (j == ncols && ncols < A->cmap->N - n) {
2528         /* a hole is outside compressed Bcols */
2529         if (ncols == 0) {
2530           if (cstart) {
2531             offdiagIdx[r] = 0;
2532           } else offdiagIdx[r] = cend;
2533         } else { /* ncols > 0 */
2534           offdiagIdx[r] = cmap[ncols - 1] + 1;
2535           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2536         }
2537       }
2538     }
2539 
2540     for (j = 0; j < ncols; j++) {
2541       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {
2542         offdiagA[r]   = *ba;
2543         offdiagIdx[r] = cmap[*bj];
2544       }
2545       ba++;
2546       bj++;
2547     }
2548   }
2549 
2550   PetscCall(VecGetArrayWrite(v, &a));
2551   PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA));
2552   for (r = 0; r < m; ++r) {
2553     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2554       a[r] = diagA[r];
2555       if (idx) idx[r] = cstart + diagIdx[r];
2556     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2557       a[r] = diagA[r];
2558       if (idx) {
2559         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2560           idx[r] = cstart + diagIdx[r];
2561         } else idx[r] = offdiagIdx[r];
2562       }
2563     } else {
2564       a[r] = offdiagA[r];
2565       if (idx) idx[r] = offdiagIdx[r];
2566     }
2567   }
2568   PetscCall(MatSeqAIJRestoreArrayRead(B, &bav));
2569   PetscCall(VecRestoreArrayWrite(v, &a));
2570   PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA));
2571   PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA));
2572   PetscCall(VecDestroy(&diagV));
2573   PetscCall(VecDestroy(&offdiagV));
2574   PetscCall(PetscFree2(diagIdx, offdiagIdx));
2575   PetscFunctionReturn(PETSC_SUCCESS);
2576 }
2577 
2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat)
2579 {
2580   Mat *dummy;
2581 
2582   PetscFunctionBegin;
2583   PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy));
2584   *newmat = *dummy;
2585   PetscCall(PetscFree(dummy));
2586   PetscFunctionReturn(PETSC_SUCCESS);
2587 }
2588 
2589 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values)
2590 {
2591   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2592 
2593   PetscFunctionBegin;
2594   PetscCall(MatInvertBlockDiagonal(a->A, values));
2595   A->factorerrortype = a->A->factorerrortype;
2596   PetscFunctionReturn(PETSC_SUCCESS);
2597 }
2598 
2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx)
2600 {
2601   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data;
2602 
2603   PetscFunctionBegin;
2604   PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2605   PetscCall(MatSetRandom(aij->A, rctx));
2606   if (x->assembled) {
2607     PetscCall(MatSetRandom(aij->B, rctx));
2608   } else {
2609     PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx));
2610   }
2611   PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY));
2612   PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY));
2613   PetscFunctionReturn(PETSC_SUCCESS);
2614 }
2615 
2616 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc)
2617 {
2618   PetscFunctionBegin;
2619   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2620   else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ;
2621   PetscFunctionReturn(PETSC_SUCCESS);
2622 }
2623 
2624 /*@
2625    MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank
2626 
2627    Not collective
2628 
2629    Input Parameter:
2630 .    A - the matrix
2631 
2632    Output Parameter:
2633 .    nz - the number of nonzeros
2634 
2635  Level: advanced
2636 
2637 .seealso: `MATMPIAIJ`, `Mat`
2638 @*/
2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz)
2640 {
2641   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data;
2642   Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data;
2643 
2644   PetscFunctionBegin;
2645   *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n];
2646   PetscFunctionReturn(PETSC_SUCCESS);
2647 }
2648 
2649 /*@
2650    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2651 
2652    Collective
2653 
2654    Input Parameters:
2655 +    A - the matrix
2656 -    sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm)
2657 
2658  Level: advanced
2659 
2660 @*/
2661 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc)
2662 {
2663   PetscFunctionBegin;
2664   PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc));
2665   PetscFunctionReturn(PETSC_SUCCESS);
2666 }
2667 
2668 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject)
2669 {
2670   PetscBool sc = PETSC_FALSE, flg;
2671 
2672   PetscFunctionBegin;
2673   PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options");
2674   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2675   PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg));
2676   if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc));
2677   PetscOptionsHeadEnd();
2678   PetscFunctionReturn(PETSC_SUCCESS);
2679 }
2680 
2681 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a)
2682 {
2683   Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data;
2684   Mat_SeqAIJ *aij  = (Mat_SeqAIJ *)maij->A->data;
2685 
2686   PetscFunctionBegin;
2687   if (!Y->preallocated) {
2688     PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL));
2689   } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */
2690     PetscInt nonew = aij->nonew;
2691     PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL));
2692     aij->nonew = nonew;
2693   }
2694   PetscCall(MatShift_Basic(Y, a));
2695   PetscFunctionReturn(PETSC_SUCCESS);
2696 }
2697 
2698 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d)
2699 {
2700   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2701 
2702   PetscFunctionBegin;
2703   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
2704   PetscCall(MatMissingDiagonal(a->A, missing, d));
2705   if (d) {
2706     PetscInt rstart;
2707     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
2708     *d += rstart;
2709   }
2710   PetscFunctionReturn(PETSC_SUCCESS);
2711 }
2712 
2713 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag)
2714 {
2715   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2716 
2717   PetscFunctionBegin;
2718   PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag));
2719   PetscFunctionReturn(PETSC_SUCCESS);
2720 }
2721 
2722 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A)
2723 {
2724   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2725 
2726   PetscFunctionBegin;
2727   PetscCall(MatEliminateZeros(a->A));
2728   PetscCall(MatEliminateZeros(a->B));
2729   PetscFunctionReturn(PETSC_SUCCESS);
2730 }
2731 
2732 /* -------------------------------------------------------------------*/
2733 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2734                                        MatGetRow_MPIAIJ,
2735                                        MatRestoreRow_MPIAIJ,
2736                                        MatMult_MPIAIJ,
2737                                        /* 4*/ MatMultAdd_MPIAIJ,
2738                                        MatMultTranspose_MPIAIJ,
2739                                        MatMultTransposeAdd_MPIAIJ,
2740                                        NULL,
2741                                        NULL,
2742                                        NULL,
2743                                        /*10*/ NULL,
2744                                        NULL,
2745                                        NULL,
2746                                        MatSOR_MPIAIJ,
2747                                        MatTranspose_MPIAIJ,
2748                                        /*15*/ MatGetInfo_MPIAIJ,
2749                                        MatEqual_MPIAIJ,
2750                                        MatGetDiagonal_MPIAIJ,
2751                                        MatDiagonalScale_MPIAIJ,
2752                                        MatNorm_MPIAIJ,
2753                                        /*20*/ MatAssemblyBegin_MPIAIJ,
2754                                        MatAssemblyEnd_MPIAIJ,
2755                                        MatSetOption_MPIAIJ,
2756                                        MatZeroEntries_MPIAIJ,
2757                                        /*24*/ MatZeroRows_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                        NULL,
2761                                        NULL,
2762                                        /*29*/ MatSetUp_MPI_Hash,
2763                                        NULL,
2764                                        NULL,
2765                                        MatGetDiagonalBlock_MPIAIJ,
2766                                        NULL,
2767                                        /*34*/ MatDuplicate_MPIAIJ,
2768                                        NULL,
2769                                        NULL,
2770                                        NULL,
2771                                        NULL,
2772                                        /*39*/ MatAXPY_MPIAIJ,
2773                                        MatCreateSubMatrices_MPIAIJ,
2774                                        MatIncreaseOverlap_MPIAIJ,
2775                                        MatGetValues_MPIAIJ,
2776                                        MatCopy_MPIAIJ,
2777                                        /*44*/ MatGetRowMax_MPIAIJ,
2778                                        MatScale_MPIAIJ,
2779                                        MatShift_MPIAIJ,
2780                                        MatDiagonalSet_MPIAIJ,
2781                                        MatZeroRowsColumns_MPIAIJ,
2782                                        /*49*/ MatSetRandom_MPIAIJ,
2783                                        MatGetRowIJ_MPIAIJ,
2784                                        MatRestoreRowIJ_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2788                                        NULL,
2789                                        MatSetUnfactored_MPIAIJ,
2790                                        MatPermute_MPIAIJ,
2791                                        NULL,
2792                                        /*59*/ MatCreateSubMatrix_MPIAIJ,
2793                                        MatDestroy_MPIAIJ,
2794                                        MatView_MPIAIJ,
2795                                        NULL,
2796                                        NULL,
2797                                        /*64*/ NULL,
2798                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2799                                        NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        /*69*/ MatGetRowMaxAbs_MPIAIJ,
2803                                        MatGetRowMinAbs_MPIAIJ,
2804                                        NULL,
2805                                        NULL,
2806                                        NULL,
2807                                        NULL,
2808                                        /*75*/ MatFDColoringApply_AIJ,
2809                                        MatSetFromOptions_MPIAIJ,
2810                                        NULL,
2811                                        NULL,
2812                                        MatFindZeroDiagonals_MPIAIJ,
2813                                        /*80*/ NULL,
2814                                        NULL,
2815                                        NULL,
2816                                        /*83*/ MatLoad_MPIAIJ,
2817                                        MatIsSymmetric_MPIAIJ,
2818                                        NULL,
2819                                        NULL,
2820                                        NULL,
2821                                        NULL,
2822                                        /*89*/ NULL,
2823                                        NULL,
2824                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2825                                        NULL,
2826                                        NULL,
2827                                        /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2828                                        NULL,
2829                                        NULL,
2830                                        NULL,
2831                                        MatBindToCPU_MPIAIJ,
2832                                        /*99*/ MatProductSetFromOptions_MPIAIJ,
2833                                        NULL,
2834                                        NULL,
2835                                        MatConjugate_MPIAIJ,
2836                                        NULL,
2837                                        /*104*/ MatSetValuesRow_MPIAIJ,
2838                                        MatRealPart_MPIAIJ,
2839                                        MatImaginaryPart_MPIAIJ,
2840                                        NULL,
2841                                        NULL,
2842                                        /*109*/ NULL,
2843                                        NULL,
2844                                        MatGetRowMin_MPIAIJ,
2845                                        NULL,
2846                                        MatMissingDiagonal_MPIAIJ,
2847                                        /*114*/ MatGetSeqNonzeroStructure_MPIAIJ,
2848                                        NULL,
2849                                        MatGetGhosts_MPIAIJ,
2850                                        NULL,
2851                                        NULL,
2852                                        /*119*/ MatMultDiagonalBlock_MPIAIJ,
2853                                        NULL,
2854                                        NULL,
2855                                        NULL,
2856                                        MatGetMultiProcBlock_MPIAIJ,
2857                                        /*124*/ MatFindNonzeroRows_MPIAIJ,
2858                                        MatGetColumnReductions_MPIAIJ,
2859                                        MatInvertBlockDiagonal_MPIAIJ,
2860                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2861                                        MatCreateSubMatricesMPI_MPIAIJ,
2862                                        /*129*/ NULL,
2863                                        NULL,
2864                                        NULL,
2865                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2866                                        NULL,
2867                                        /*134*/ NULL,
2868                                        NULL,
2869                                        NULL,
2870                                        NULL,
2871                                        NULL,
2872                                        /*139*/ MatSetBlockSizes_MPIAIJ,
2873                                        NULL,
2874                                        NULL,
2875                                        MatFDColoringSetUp_MPIXAIJ,
2876                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2877                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2878                                        /*145*/ NULL,
2879                                        NULL,
2880                                        NULL,
2881                                        MatCreateGraph_Simple_AIJ,
2882                                        NULL,
2883                                        /*150*/ NULL,
2884                                        MatEliminateZeros_MPIAIJ};
2885 
2886 /* ----------------------------------------------------------------------------------------*/
2887 
2888 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
2889 {
2890   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2891 
2892   PetscFunctionBegin;
2893   PetscCall(MatStoreValues(aij->A));
2894   PetscCall(MatStoreValues(aij->B));
2895   PetscFunctionReturn(PETSC_SUCCESS);
2896 }
2897 
2898 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
2899 {
2900   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
2901 
2902   PetscFunctionBegin;
2903   PetscCall(MatRetrieveValues(aij->A));
2904   PetscCall(MatRetrieveValues(aij->B));
2905   PetscFunctionReturn(PETSC_SUCCESS);
2906 }
2907 
2908 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2909 {
2910   Mat_MPIAIJ *b;
2911   PetscMPIInt size;
2912 
2913   PetscFunctionBegin;
2914   PetscCall(PetscLayoutSetUp(B->rmap));
2915   PetscCall(PetscLayoutSetUp(B->cmap));
2916   b = (Mat_MPIAIJ *)B->data;
2917 
2918 #if defined(PETSC_USE_CTABLE)
2919   PetscCall(PetscHMapIDestroy(&b->colmap));
2920 #else
2921   PetscCall(PetscFree(b->colmap));
2922 #endif
2923   PetscCall(PetscFree(b->garray));
2924   PetscCall(VecDestroy(&b->lvec));
2925   PetscCall(VecScatterDestroy(&b->Mvctx));
2926 
2927   /* Because the B will have been resized we simply destroy it and create a new one each time */
2928   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
2929   PetscCall(MatDestroy(&b->B));
2930   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
2931   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
2932   PetscCall(MatSetBlockSizesFromMats(b->B, B, B));
2933   PetscCall(MatSetType(b->B, MATSEQAIJ));
2934 
2935   if (!B->preallocated) {
2936     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
2937     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
2938     PetscCall(MatSetBlockSizesFromMats(b->A, B, B));
2939     PetscCall(MatSetType(b->A, MATSEQAIJ));
2940   }
2941 
2942   PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz));
2943   PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz));
2944   B->preallocated  = PETSC_TRUE;
2945   B->was_assembled = PETSC_FALSE;
2946   B->assembled     = PETSC_FALSE;
2947   PetscFunctionReturn(PETSC_SUCCESS);
2948 }
2949 
2950 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2951 {
2952   Mat_MPIAIJ *b;
2953 
2954   PetscFunctionBegin;
2955   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
2956   PetscCall(PetscLayoutSetUp(B->rmap));
2957   PetscCall(PetscLayoutSetUp(B->cmap));
2958   b = (Mat_MPIAIJ *)B->data;
2959 
2960 #if defined(PETSC_USE_CTABLE)
2961   PetscCall(PetscHMapIDestroy(&b->colmap));
2962 #else
2963   PetscCall(PetscFree(b->colmap));
2964 #endif
2965   PetscCall(PetscFree(b->garray));
2966   PetscCall(VecDestroy(&b->lvec));
2967   PetscCall(VecScatterDestroy(&b->Mvctx));
2968 
2969   PetscCall(MatResetPreallocation(b->A));
2970   PetscCall(MatResetPreallocation(b->B));
2971   B->preallocated  = PETSC_TRUE;
2972   B->was_assembled = PETSC_FALSE;
2973   B->assembled     = PETSC_FALSE;
2974   PetscFunctionReturn(PETSC_SUCCESS);
2975 }
2976 
2977 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2978 {
2979   Mat         mat;
2980   Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data;
2981 
2982   PetscFunctionBegin;
2983   *newmat = NULL;
2984   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2985   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2986   PetscCall(MatSetBlockSizesFromMats(mat, matin, matin));
2987   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2988   a = (Mat_MPIAIJ *)mat->data;
2989 
2990   mat->factortype   = matin->factortype;
2991   mat->assembled    = matin->assembled;
2992   mat->insertmode   = NOT_SET_VALUES;
2993   mat->preallocated = matin->preallocated;
2994 
2995   a->size         = oldmat->size;
2996   a->rank         = oldmat->rank;
2997   a->donotstash   = oldmat->donotstash;
2998   a->roworiented  = oldmat->roworiented;
2999   a->rowindices   = NULL;
3000   a->rowvalues    = NULL;
3001   a->getrowactive = PETSC_FALSE;
3002 
3003   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
3004   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3005 
3006   if (oldmat->colmap) {
3007 #if defined(PETSC_USE_CTABLE)
3008     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
3009 #else
3010     PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap));
3011     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N));
3012 #endif
3013   } else a->colmap = NULL;
3014   if (oldmat->garray) {
3015     PetscInt len;
3016     len = oldmat->B->cmap->n;
3017     PetscCall(PetscMalloc1(len + 1, &a->garray));
3018     if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3019   } else a->garray = NULL;
3020 
3021   /* It may happen MatDuplicate is called with a non-assembled matrix
3022      In fact, MatDuplicate only requires the matrix to be preallocated
3023      This may happen inside a DMCreateMatrix_Shell */
3024   if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); }
3025   if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); }
3026   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
3027   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
3028   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
3029   *newmat = mat;
3030   PetscFunctionReturn(PETSC_SUCCESS);
3031 }
3032 
3033 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3034 {
3035   PetscBool isbinary, ishdf5;
3036 
3037   PetscFunctionBegin;
3038   PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1);
3039   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
3040   /* force binary viewer to load .info file if it has not yet done so */
3041   PetscCall(PetscViewerSetUp(viewer));
3042   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
3043   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5));
3044   if (isbinary) {
3045     PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer));
3046   } else if (ishdf5) {
3047 #if defined(PETSC_HAVE_HDF5)
3048     PetscCall(MatLoad_AIJ_HDF5(newMat, viewer));
3049 #else
3050     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
3051 #endif
3052   } else {
3053     SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name);
3054   }
3055   PetscFunctionReturn(PETSC_SUCCESS);
3056 }
3057 
3058 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3059 {
3060   PetscInt     header[4], M, N, m, nz, rows, cols, sum, i;
3061   PetscInt    *rowidxs, *colidxs;
3062   PetscScalar *matvals;
3063 
3064   PetscFunctionBegin;
3065   PetscCall(PetscViewerSetUp(viewer));
3066 
3067   /* read in matrix header */
3068   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
3069   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
3070   M  = header[1];
3071   N  = header[2];
3072   nz = header[3];
3073   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
3074   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
3075   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ");
3076 
3077   /* set block sizes from the viewer's .info file */
3078   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3079   /* set global sizes if not set already */
3080   if (mat->rmap->N < 0) mat->rmap->N = M;
3081   if (mat->cmap->N < 0) mat->cmap->N = N;
3082   PetscCall(PetscLayoutSetUp(mat->rmap));
3083   PetscCall(PetscLayoutSetUp(mat->cmap));
3084 
3085   /* check if the matrix sizes are correct */
3086   PetscCall(MatGetSize(mat, &rows, &cols));
3087   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
3088 
3089   /* read in row lengths and build row indices */
3090   PetscCall(MatGetLocalSize(mat, &m, NULL));
3091   PetscCall(PetscMalloc1(m + 1, &rowidxs));
3092   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
3093   rowidxs[0] = 0;
3094   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3095   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
3096   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3097   /* read in column indices and matrix values */
3098   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
3099   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
3100   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3101   /* store matrix indices and values */
3102   PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals));
3103   PetscCall(PetscFree(rowidxs));
3104   PetscCall(PetscFree2(colidxs, matvals));
3105   PetscFunctionReturn(PETSC_SUCCESS);
3106 }
3107 
3108 /* Not scalable because of ISAllGather() unless getting all columns. */
3109 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq)
3110 {
3111   IS          iscol_local;
3112   PetscBool   isstride;
3113   PetscMPIInt lisstride = 0, gisstride;
3114 
3115   PetscFunctionBegin;
3116   /* check if we are grabbing all columns*/
3117   PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride));
3118 
3119   if (isstride) {
3120     PetscInt start, len, mstart, mlen;
3121     PetscCall(ISStrideGetInfo(iscol, &start, NULL));
3122     PetscCall(ISGetLocalSize(iscol, &len));
3123     PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen));
3124     if (mstart == start && mlen - mstart == len) lisstride = 1;
3125   }
3126 
3127   PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat)));
3128   if (gisstride) {
3129     PetscInt N;
3130     PetscCall(MatGetSize(mat, NULL, &N));
3131     PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local));
3132     PetscCall(ISSetIdentity(iscol_local));
3133     PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n"));
3134   } else {
3135     PetscInt cbs;
3136     PetscCall(ISGetBlockSize(iscol, &cbs));
3137     PetscCall(ISAllGather(iscol, &iscol_local));
3138     PetscCall(ISSetBlockSize(iscol_local, cbs));
3139   }
3140 
3141   *isseq = iscol_local;
3142   PetscFunctionReturn(PETSC_SUCCESS);
3143 }
3144 
3145 /*
3146  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3147  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3148 
3149  Input Parameters:
3150    mat - matrix
3151    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3152            i.e., mat->rstart <= isrow[i] < mat->rend
3153    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3154            i.e., mat->cstart <= iscol[i] < mat->cend
3155  Output Parameter:
3156    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3157    iscol_o - sequential column index set for retrieving mat->B
3158    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3159  */
3160 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[])
3161 {
3162   Vec             x, cmap;
3163   const PetscInt *is_idx;
3164   PetscScalar    *xarray, *cmaparray;
3165   PetscInt        ncols, isstart, *idx, m, rstart, *cmap1, count;
3166   Mat_MPIAIJ     *a    = (Mat_MPIAIJ *)mat->data;
3167   Mat             B    = a->B;
3168   Vec             lvec = a->lvec, lcmap;
3169   PetscInt        i, cstart, cend, Bn = B->cmap->N;
3170   MPI_Comm        comm;
3171   VecScatter      Mvctx = a->Mvctx;
3172 
3173   PetscFunctionBegin;
3174   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3175   PetscCall(ISGetLocalSize(iscol, &ncols));
3176 
3177   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3178   PetscCall(MatCreateVecs(mat, &x, NULL));
3179   PetscCall(VecSet(x, -1.0));
3180   PetscCall(VecDuplicate(x, &cmap));
3181   PetscCall(VecSet(cmap, -1.0));
3182 
3183   /* Get start indices */
3184   PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm));
3185   isstart -= ncols;
3186   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
3187 
3188   PetscCall(ISGetIndices(iscol, &is_idx));
3189   PetscCall(VecGetArray(x, &xarray));
3190   PetscCall(VecGetArray(cmap, &cmaparray));
3191   PetscCall(PetscMalloc1(ncols, &idx));
3192   for (i = 0; i < ncols; i++) {
3193     xarray[is_idx[i] - cstart]    = (PetscScalar)is_idx[i];
3194     cmaparray[is_idx[i] - cstart] = i + isstart;        /* global index of iscol[i] */
3195     idx[i]                        = is_idx[i] - cstart; /* local index of iscol[i]  */
3196   }
3197   PetscCall(VecRestoreArray(x, &xarray));
3198   PetscCall(VecRestoreArray(cmap, &cmaparray));
3199   PetscCall(ISRestoreIndices(iscol, &is_idx));
3200 
3201   /* Get iscol_d */
3202   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d));
3203   PetscCall(ISGetBlockSize(iscol, &i));
3204   PetscCall(ISSetBlockSize(*iscol_d, i));
3205 
3206   /* Get isrow_d */
3207   PetscCall(ISGetLocalSize(isrow, &m));
3208   rstart = mat->rmap->rstart;
3209   PetscCall(PetscMalloc1(m, &idx));
3210   PetscCall(ISGetIndices(isrow, &is_idx));
3211   for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart;
3212   PetscCall(ISRestoreIndices(isrow, &is_idx));
3213 
3214   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d));
3215   PetscCall(ISGetBlockSize(isrow, &i));
3216   PetscCall(ISSetBlockSize(*isrow_d, i));
3217 
3218   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3219   PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3220   PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD));
3221 
3222   PetscCall(VecDuplicate(lvec, &lcmap));
3223 
3224   PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3225   PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD));
3226 
3227   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3228   /* off-process column indices */
3229   count = 0;
3230   PetscCall(PetscMalloc1(Bn, &idx));
3231   PetscCall(PetscMalloc1(Bn, &cmap1));
3232 
3233   PetscCall(VecGetArray(lvec, &xarray));
3234   PetscCall(VecGetArray(lcmap, &cmaparray));
3235   for (i = 0; i < Bn; i++) {
3236     if (PetscRealPart(xarray[i]) > -1.0) {
3237       idx[count]   = i;                                     /* local column index in off-diagonal part B */
3238       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */
3239       count++;
3240     }
3241   }
3242   PetscCall(VecRestoreArray(lvec, &xarray));
3243   PetscCall(VecRestoreArray(lcmap, &cmaparray));
3244 
3245   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o));
3246   /* cannot ensure iscol_o has same blocksize as iscol! */
3247 
3248   PetscCall(PetscFree(idx));
3249   *garray = cmap1;
3250 
3251   PetscCall(VecDestroy(&x));
3252   PetscCall(VecDestroy(&cmap));
3253   PetscCall(VecDestroy(&lcmap));
3254   PetscFunctionReturn(PETSC_SUCCESS);
3255 }
3256 
3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat)
3259 {
3260   Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub;
3261   Mat         M = NULL;
3262   MPI_Comm    comm;
3263   IS          iscol_d, isrow_d, iscol_o;
3264   Mat         Asub = NULL, Bsub = NULL;
3265   PetscInt    n;
3266 
3267   PetscFunctionBegin;
3268   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3269 
3270   if (call == MAT_REUSE_MATRIX) {
3271     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3272     PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d));
3273     PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse");
3274 
3275     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d));
3276     PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse");
3277 
3278     PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o));
3279     PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse");
3280 
3281     /* Update diagonal and off-diagonal portions of submat */
3282     asub = (Mat_MPIAIJ *)(*submat)->data;
3283     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A));
3284     PetscCall(ISGetLocalSize(iscol_o, &n));
3285     if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B));
3286     PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY));
3287     PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY));
3288 
3289   } else { /* call == MAT_INITIAL_MATRIX) */
3290     const PetscInt *garray;
3291     PetscInt        BsubN;
3292 
3293     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3294     PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray));
3295 
3296     /* Create local submatrices Asub and Bsub */
3297     PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub));
3298     PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub));
3299 
3300     /* Create submatrix M */
3301     PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M));
3302 
3303     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3304     asub = (Mat_MPIAIJ *)M->data;
3305 
3306     PetscCall(ISGetLocalSize(iscol_o, &BsubN));
3307     n = asub->B->cmap->N;
3308     if (BsubN > n) {
3309       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3310       const PetscInt *idx;
3311       PetscInt        i, j, *idx_new, *subgarray = asub->garray;
3312       PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN));
3313 
3314       PetscCall(PetscMalloc1(n, &idx_new));
3315       j = 0;
3316       PetscCall(ISGetIndices(iscol_o, &idx));
3317       for (i = 0; i < n; i++) {
3318         if (j >= BsubN) break;
3319         while (subgarray[i] > garray[j]) j++;
3320 
3321         if (subgarray[i] == garray[j]) {
3322           idx_new[i] = idx[j++];
3323         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]);
3324       }
3325       PetscCall(ISRestoreIndices(iscol_o, &idx));
3326 
3327       PetscCall(ISDestroy(&iscol_o));
3328       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o));
3329 
3330     } else if (BsubN < n) {
3331       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N);
3332     }
3333 
3334     PetscCall(PetscFree(garray));
3335     *submat = M;
3336 
3337     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3338     PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d));
3339     PetscCall(ISDestroy(&isrow_d));
3340 
3341     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d));
3342     PetscCall(ISDestroy(&iscol_d));
3343 
3344     PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o));
3345     PetscCall(ISDestroy(&iscol_o));
3346   }
3347   PetscFunctionReturn(PETSC_SUCCESS);
3348 }
3349 
3350 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
3351 {
3352   IS        iscol_local = NULL, isrow_d;
3353   PetscInt  csize;
3354   PetscInt  n, i, j, start, end;
3355   PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2];
3356   MPI_Comm  comm;
3357 
3358   PetscFunctionBegin;
3359   /* If isrow has same processor distribution as mat,
3360      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3361   if (call == MAT_REUSE_MATRIX) {
3362     PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d));
3363     if (isrow_d) {
3364       sameRowDist  = PETSC_TRUE;
3365       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3366     } else {
3367       PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local));
3368       if (iscol_local) {
3369         sameRowDist  = PETSC_TRUE;
3370         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3371       }
3372     }
3373   } else {
3374     /* Check if isrow has same processor distribution as mat */
3375     sameDist[0] = PETSC_FALSE;
3376     PetscCall(ISGetLocalSize(isrow, &n));
3377     if (!n) {
3378       sameDist[0] = PETSC_TRUE;
3379     } else {
3380       PetscCall(ISGetMinMax(isrow, &i, &j));
3381       PetscCall(MatGetOwnershipRange(mat, &start, &end));
3382       if (i >= start && j < end) sameDist[0] = PETSC_TRUE;
3383     }
3384 
3385     /* Check if iscol has same processor distribution as mat */
3386     sameDist[1] = PETSC_FALSE;
3387     PetscCall(ISGetLocalSize(iscol, &n));
3388     if (!n) {
3389       sameDist[1] = PETSC_TRUE;
3390     } else {
3391       PetscCall(ISGetMinMax(iscol, &i, &j));
3392       PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end));
3393       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3394     }
3395 
3396     PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3397     PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm));
3398     sameRowDist = tsameDist[0];
3399   }
3400 
3401   if (sameRowDist) {
3402     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3403       /* isrow and iscol have same processor distribution as mat */
3404       PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat));
3405       PetscFunctionReturn(PETSC_SUCCESS);
3406     } else { /* sameRowDist */
3407       /* isrow has same processor distribution as mat */
3408       if (call == MAT_INITIAL_MATRIX) {
3409         PetscBool sorted;
3410         PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3411         PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */
3412         PetscCall(ISGetSize(iscol, &i));
3413         PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i);
3414 
3415         PetscCall(ISSorted(iscol_local, &sorted));
3416         if (sorted) {
3417           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3418           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat));
3419           PetscFunctionReturn(PETSC_SUCCESS);
3420         }
3421       } else { /* call == MAT_REUSE_MATRIX */
3422         IS iscol_sub;
3423         PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3424         if (iscol_sub) {
3425           PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat));
3426           PetscFunctionReturn(PETSC_SUCCESS);
3427         }
3428       }
3429     }
3430   }
3431 
3432   /* General case: iscol -> iscol_local which has global size of iscol */
3433   if (call == MAT_REUSE_MATRIX) {
3434     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
3435     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3436   } else {
3437     if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local));
3438   }
3439 
3440   PetscCall(ISGetLocalSize(iscol, &csize));
3441   PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat));
3442 
3443   if (call == MAT_INITIAL_MATRIX) {
3444     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
3445     PetscCall(ISDestroy(&iscol_local));
3446   }
3447   PetscFunctionReturn(PETSC_SUCCESS);
3448 }
3449 
3450 /*@C
3451      MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal"
3452          and "off-diagonal" part of the matrix in CSR format.
3453 
3454    Collective
3455 
3456    Input Parameters:
3457 +  comm - MPI communicator
3458 .  A - "diagonal" portion of matrix
3459 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3460 -  garray - global index of B columns
3461 
3462    Output Parameter:
3463 .   mat - the matrix, with input A as its local diagonal matrix
3464    Level: advanced
3465 
3466    Notes:
3467    See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3468 
3469    A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3470 
3471 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()`
3472 @*/
3473 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat)
3474 {
3475   Mat_MPIAIJ        *maij;
3476   Mat_SeqAIJ        *b  = (Mat_SeqAIJ *)B->data, *bnew;
3477   PetscInt          *oi = b->i, *oj = b->j, i, nz, col;
3478   const PetscScalar *oa;
3479   Mat                Bnew;
3480   PetscInt           m, n, N;
3481   MatType            mpi_mat_type;
3482 
3483   PetscFunctionBegin;
3484   PetscCall(MatCreate(comm, mat));
3485   PetscCall(MatGetSize(A, &m, &n));
3486   PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N);
3487   PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs);
3488   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3489   /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */
3490 
3491   /* Get global columns of mat */
3492   PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm));
3493 
3494   PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N));
3495   /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */
3496   PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type));
3497   PetscCall(MatSetType(*mat, mpi_mat_type));
3498 
3499   PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs));
3500   maij = (Mat_MPIAIJ *)(*mat)->data;
3501 
3502   (*mat)->preallocated = PETSC_TRUE;
3503 
3504   PetscCall(PetscLayoutSetUp((*mat)->rmap));
3505   PetscCall(PetscLayoutSetUp((*mat)->cmap));
3506 
3507   /* Set A as diagonal portion of *mat */
3508   maij->A = A;
3509 
3510   nz = oi[m];
3511   for (i = 0; i < nz; i++) {
3512     col   = oj[i];
3513     oj[i] = garray[col];
3514   }
3515 
3516   /* Set Bnew as off-diagonal portion of *mat */
3517   PetscCall(MatSeqAIJGetArrayRead(B, &oa));
3518   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew));
3519   PetscCall(MatSeqAIJRestoreArrayRead(B, &oa));
3520   bnew        = (Mat_SeqAIJ *)Bnew->data;
3521   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3522   maij->B     = Bnew;
3523 
3524   PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N);
3525 
3526   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3527   b->free_a       = PETSC_FALSE;
3528   b->free_ij      = PETSC_FALSE;
3529   PetscCall(MatDestroy(&B));
3530 
3531   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3532   bnew->free_a       = PETSC_TRUE;
3533   bnew->free_ij      = PETSC_TRUE;
3534 
3535   /* condense columns of maij->B */
3536   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3537   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
3538   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3539   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
3540   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3541   PetscFunctionReturn(PETSC_SUCCESS);
3542 }
3543 
3544 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *);
3545 
3546 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat)
3547 {
3548   PetscInt        i, m, n, rstart, row, rend, nz, j, bs, cbs;
3549   PetscInt       *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3550   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)mat->data;
3551   Mat             M, Msub, B = a->B;
3552   MatScalar      *aa;
3553   Mat_SeqAIJ     *aij;
3554   PetscInt       *garray = a->garray, *colsub, Ncols;
3555   PetscInt        count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend;
3556   IS              iscol_sub, iscmap;
3557   const PetscInt *is_idx, *cmap;
3558   PetscBool       allcolumns = PETSC_FALSE;
3559   MPI_Comm        comm;
3560 
3561   PetscFunctionBegin;
3562   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3563   if (call == MAT_REUSE_MATRIX) {
3564     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub));
3565     PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse");
3566     PetscCall(ISGetLocalSize(iscol_sub, &count));
3567 
3568     PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap));
3569     PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse");
3570 
3571     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub));
3572     PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3573 
3574     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub));
3575 
3576   } else { /* call == MAT_INITIAL_MATRIX) */
3577     PetscBool flg;
3578 
3579     PetscCall(ISGetLocalSize(iscol, &n));
3580     PetscCall(ISGetSize(iscol, &Ncols));
3581 
3582     /* (1) iscol -> nonscalable iscol_local */
3583     /* Check for special case: each processor gets entire matrix columns */
3584     PetscCall(ISIdentity(iscol_local, &flg));
3585     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3586     PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3587     if (allcolumns) {
3588       iscol_sub = iscol_local;
3589       PetscCall(PetscObjectReference((PetscObject)iscol_local));
3590       PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap));
3591 
3592     } else {
3593       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3594       PetscInt *idx, *cmap1, k;
3595       PetscCall(PetscMalloc1(Ncols, &idx));
3596       PetscCall(PetscMalloc1(Ncols, &cmap1));
3597       PetscCall(ISGetIndices(iscol_local, &is_idx));
3598       count = 0;
3599       k     = 0;
3600       for (i = 0; i < Ncols; i++) {
3601         j = is_idx[i];
3602         if (j >= cstart && j < cend) {
3603           /* diagonal part of mat */
3604           idx[count]     = j;
3605           cmap1[count++] = i; /* column index in submat */
3606         } else if (Bn) {
3607           /* off-diagonal part of mat */
3608           if (j == garray[k]) {
3609             idx[count]     = j;
3610             cmap1[count++] = i; /* column index in submat */
3611           } else if (j > garray[k]) {
3612             while (j > garray[k] && k < Bn - 1) k++;
3613             if (j == garray[k]) {
3614               idx[count]     = j;
3615               cmap1[count++] = i; /* column index in submat */
3616             }
3617           }
3618         }
3619       }
3620       PetscCall(ISRestoreIndices(iscol_local, &is_idx));
3621 
3622       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub));
3623       PetscCall(ISGetBlockSize(iscol, &cbs));
3624       PetscCall(ISSetBlockSize(iscol_sub, cbs));
3625 
3626       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap));
3627     }
3628 
3629     /* (3) Create sequential Msub */
3630     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub));
3631   }
3632 
3633   PetscCall(ISGetLocalSize(iscol_sub, &count));
3634   aij = (Mat_SeqAIJ *)(Msub)->data;
3635   ii  = aij->i;
3636   PetscCall(ISGetIndices(iscmap, &cmap));
3637 
3638   /*
3639       m - number of local rows
3640       Ncols - number of columns (same on all processors)
3641       rstart - first row in new global matrix generated
3642   */
3643   PetscCall(MatGetSize(Msub, &m, NULL));
3644 
3645   if (call == MAT_INITIAL_MATRIX) {
3646     /* (4) Create parallel newmat */
3647     PetscMPIInt rank, size;
3648     PetscInt    csize;
3649 
3650     PetscCallMPI(MPI_Comm_size(comm, &size));
3651     PetscCallMPI(MPI_Comm_rank(comm, &rank));
3652 
3653     /*
3654         Determine the number of non-zeros in the diagonal and off-diagonal
3655         portions of the matrix in order to do correct preallocation
3656     */
3657 
3658     /* first get start and end of "diagonal" columns */
3659     PetscCall(ISGetLocalSize(iscol, &csize));
3660     if (csize == PETSC_DECIDE) {
3661       PetscCall(ISGetSize(isrow, &mglobal));
3662       if (mglobal == Ncols) { /* square matrix */
3663         nlocal = m;
3664       } else {
3665         nlocal = Ncols / size + ((Ncols % size) > rank);
3666       }
3667     } else {
3668       nlocal = csize;
3669     }
3670     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3671     rstart = rend - nlocal;
3672     PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols);
3673 
3674     /* next, compute all the lengths */
3675     jj = aij->j;
3676     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3677     olens = dlens + m;
3678     for (i = 0; i < m; i++) {
3679       jend = ii[i + 1] - ii[i];
3680       olen = 0;
3681       dlen = 0;
3682       for (j = 0; j < jend; j++) {
3683         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3684         else dlen++;
3685         jj++;
3686       }
3687       olens[i] = olen;
3688       dlens[i] = dlen;
3689     }
3690 
3691     PetscCall(ISGetBlockSize(isrow, &bs));
3692     PetscCall(ISGetBlockSize(iscol, &cbs));
3693 
3694     PetscCall(MatCreate(comm, &M));
3695     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols));
3696     PetscCall(MatSetBlockSizes(M, bs, cbs));
3697     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3698     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3699     PetscCall(PetscFree(dlens));
3700 
3701   } else { /* call == MAT_REUSE_MATRIX */
3702     M = *newmat;
3703     PetscCall(MatGetLocalSize(M, &i, NULL));
3704     PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3705     PetscCall(MatZeroEntries(M));
3706     /*
3707          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3708        rather than the slower MatSetValues().
3709     */
3710     M->was_assembled = PETSC_TRUE;
3711     M->assembled     = PETSC_FALSE;
3712   }
3713 
3714   /* (5) Set values of Msub to *newmat */
3715   PetscCall(PetscMalloc1(count, &colsub));
3716   PetscCall(MatGetOwnershipRange(M, &rstart, NULL));
3717 
3718   jj = aij->j;
3719   PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa));
3720   for (i = 0; i < m; i++) {
3721     row = rstart + i;
3722     nz  = ii[i + 1] - ii[i];
3723     for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]];
3724     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES));
3725     jj += nz;
3726     aa += nz;
3727   }
3728   PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa));
3729   PetscCall(ISRestoreIndices(iscmap, &cmap));
3730 
3731   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3732   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3733 
3734   PetscCall(PetscFree(colsub));
3735 
3736   /* save Msub, iscol_sub and iscmap used in processor for next request */
3737   if (call == MAT_INITIAL_MATRIX) {
3738     *newmat = M;
3739     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub));
3740     PetscCall(MatDestroy(&Msub));
3741 
3742     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub));
3743     PetscCall(ISDestroy(&iscol_sub));
3744 
3745     PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap));
3746     PetscCall(ISDestroy(&iscmap));
3747 
3748     if (iscol_local) {
3749       PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local));
3750       PetscCall(ISDestroy(&iscol_local));
3751     }
3752   }
3753   PetscFunctionReturn(PETSC_SUCCESS);
3754 }
3755 
3756 /*
3757     Not great since it makes two copies of the submatrix, first an SeqAIJ
3758   in local and then by concatenating the local matrices the end result.
3759   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3760 
3761   This requires a sequential iscol with all indices.
3762 */
3763 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat)
3764 {
3765   PetscMPIInt rank, size;
3766   PetscInt    i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs;
3767   PetscInt   *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
3768   Mat         M, Mreuse;
3769   MatScalar  *aa, *vwork;
3770   MPI_Comm    comm;
3771   Mat_SeqAIJ *aij;
3772   PetscBool   colflag, allcolumns = PETSC_FALSE;
3773 
3774   PetscFunctionBegin;
3775   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
3776   PetscCallMPI(MPI_Comm_rank(comm, &rank));
3777   PetscCallMPI(MPI_Comm_size(comm, &size));
3778 
3779   /* Check for special case: each processor gets entire matrix columns */
3780   PetscCall(ISIdentity(iscol, &colflag));
3781   PetscCall(ISGetLocalSize(iscol, &n));
3782   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3783   PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
3784 
3785   if (call == MAT_REUSE_MATRIX) {
3786     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
3787     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
3788     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse));
3789   } else {
3790     PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse));
3791   }
3792 
3793   /*
3794       m - number of local rows
3795       n - number of columns (same on all processors)
3796       rstart - first row in new global matrix generated
3797   */
3798   PetscCall(MatGetSize(Mreuse, &m, &n));
3799   PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs));
3800   if (call == MAT_INITIAL_MATRIX) {
3801     aij = (Mat_SeqAIJ *)(Mreuse)->data;
3802     ii  = aij->i;
3803     jj  = aij->j;
3804 
3805     /*
3806         Determine the number of non-zeros in the diagonal and off-diagonal
3807         portions of the matrix in order to do correct preallocation
3808     */
3809 
3810     /* first get start and end of "diagonal" columns */
3811     if (csize == PETSC_DECIDE) {
3812       PetscCall(ISGetSize(isrow, &mglobal));
3813       if (mglobal == n) { /* square matrix */
3814         nlocal = m;
3815       } else {
3816         nlocal = n / size + ((n % size) > rank);
3817       }
3818     } else {
3819       nlocal = csize;
3820     }
3821     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
3822     rstart = rend - nlocal;
3823     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
3824 
3825     /* next, compute all the lengths */
3826     PetscCall(PetscMalloc1(2 * m + 1, &dlens));
3827     olens = dlens + m;
3828     for (i = 0; i < m; i++) {
3829       jend = ii[i + 1] - ii[i];
3830       olen = 0;
3831       dlen = 0;
3832       for (j = 0; j < jend; j++) {
3833         if (*jj < rstart || *jj >= rend) olen++;
3834         else dlen++;
3835         jj++;
3836       }
3837       olens[i] = olen;
3838       dlens[i] = dlen;
3839     }
3840     PetscCall(MatCreate(comm, &M));
3841     PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n));
3842     PetscCall(MatSetBlockSizes(M, bs, cbs));
3843     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
3844     PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens));
3845     PetscCall(PetscFree(dlens));
3846   } else {
3847     PetscInt ml, nl;
3848 
3849     M = *newmat;
3850     PetscCall(MatGetLocalSize(M, &ml, &nl));
3851     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
3852     PetscCall(MatZeroEntries(M));
3853     /*
3854          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3855        rather than the slower MatSetValues().
3856     */
3857     M->was_assembled = PETSC_TRUE;
3858     M->assembled     = PETSC_FALSE;
3859   }
3860   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
3861   aij = (Mat_SeqAIJ *)(Mreuse)->data;
3862   ii  = aij->i;
3863   jj  = aij->j;
3864 
3865   /* trigger copy to CPU if needed */
3866   PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa));
3867   for (i = 0; i < m; i++) {
3868     row   = rstart + i;
3869     nz    = ii[i + 1] - ii[i];
3870     cwork = jj;
3871     jj += nz;
3872     vwork = aa;
3873     aa += nz;
3874     PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
3875   }
3876   PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa));
3877 
3878   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
3879   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
3880   *newmat = M;
3881 
3882   /* save submatrix used in processor for next request */
3883   if (call == MAT_INITIAL_MATRIX) {
3884     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
3885     PetscCall(MatDestroy(&Mreuse));
3886   }
3887   PetscFunctionReturn(PETSC_SUCCESS);
3888 }
3889 
3890 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
3891 {
3892   PetscInt        m, cstart, cend, j, nnz, i, d, *ld;
3893   PetscInt       *d_nnz, *o_nnz, nnz_max = 0, rstart, ii;
3894   const PetscInt *JJ;
3895   PetscBool       nooffprocentries;
3896   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)B->data;
3897 
3898   PetscFunctionBegin;
3899   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]);
3900 
3901   PetscCall(PetscLayoutSetUp(B->rmap));
3902   PetscCall(PetscLayoutSetUp(B->cmap));
3903   m      = B->rmap->n;
3904   cstart = B->cmap->rstart;
3905   cend   = B->cmap->rend;
3906   rstart = B->rmap->rstart;
3907 
3908   PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz));
3909 
3910   if (PetscDefined(USE_DEBUG)) {
3911     for (i = 0; i < m; i++) {
3912       nnz = Ii[i + 1] - Ii[i];
3913       JJ  = J + Ii[i];
3914       PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz);
3915       PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]);
3916       PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N);
3917     }
3918   }
3919 
3920   for (i = 0; i < m; i++) {
3921     nnz     = Ii[i + 1] - Ii[i];
3922     JJ      = J + Ii[i];
3923     nnz_max = PetscMax(nnz_max, nnz);
3924     d       = 0;
3925     for (j = 0; j < nnz; j++) {
3926       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3927     }
3928     d_nnz[i] = d;
3929     o_nnz[i] = nnz - d;
3930   }
3931   PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz));
3932   PetscCall(PetscFree2(d_nnz, o_nnz));
3933 
3934   for (i = 0; i < m; i++) {
3935     ii = i + rstart;
3936     PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES));
3937   }
3938   nooffprocentries    = B->nooffprocentries;
3939   B->nooffprocentries = PETSC_TRUE;
3940   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
3941   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
3942   B->nooffprocentries = nooffprocentries;
3943 
3944   /* count number of entries below block diagonal */
3945   PetscCall(PetscFree(Aij->ld));
3946   PetscCall(PetscCalloc1(m, &ld));
3947   Aij->ld = ld;
3948   for (i = 0; i < m; i++) {
3949     nnz = Ii[i + 1] - Ii[i];
3950     j   = 0;
3951     while (j < nnz && J[j] < cstart) j++;
3952     ld[i] = j;
3953     J += nnz;
3954   }
3955 
3956   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3957   PetscFunctionReturn(PETSC_SUCCESS);
3958 }
3959 
3960 /*@
3961    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format
3962    (the default parallel PETSc format).
3963 
3964    Collective
3965 
3966    Input Parameters:
3967 +  B - the matrix
3968 .  i - the indices into j for the start of each local row (starts with zero)
3969 .  j - the column indices for each local row (starts with zero)
3970 -  v - optional values in the matrix
3971 
3972    Level: developer
3973 
3974    Notes:
3975        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3976      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3977      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3978 
3979        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3980 
3981        The format which is used for the sparse matrix input, is equivalent to a
3982     row-major ordering.. i.e for the following matrix, the input data expected is
3983     as shown
3984 
3985 $        1 0 0
3986 $        2 0 3     P0
3987 $       -------
3988 $        4 5 6     P1
3989 $
3990 $     Process0 [P0]: rows_owned=[0,1]
3991 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3992 $        j =  {0,0,2}  [size = 3]
3993 $        v =  {1,2,3}  [size = 3]
3994 $
3995 $     Process1 [P1]: rows_owned=[2]
3996 $        i =  {0,3}    [size = nrow+1  = 1+1]
3997 $        j =  {0,1,2}  [size = 3]
3998 $        v =  {4,5,6}  [size = 3]
3999 
4000 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`,
4001           `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`
4002 @*/
4003 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
4004 {
4005   PetscFunctionBegin;
4006   PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v));
4007   PetscFunctionReturn(PETSC_SUCCESS);
4008 }
4009 
4010 /*@C
4011    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format
4012    (the default parallel PETSc format).  For good matrix assembly performance
4013    the user should preallocate the matrix storage by setting the parameters
4014    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4015    performance can be increased by more than a factor of 50.
4016 
4017    Collective
4018 
4019    Input Parameters:
4020 +  B - the matrix
4021 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4022            (same value is used for all local rows)
4023 .  d_nnz - array containing the number of nonzeros in the various rows of the
4024            DIAGONAL portion of the local submatrix (possibly different for each row)
4025            or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure.
4026            The size of this array is equal to the number of local rows, i.e 'm'.
4027            For matrices that will be factored, you must leave room for (and set)
4028            the diagonal entry even if it is zero.
4029 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4030            submatrix (same value is used for all local rows).
4031 -  o_nnz - array containing the number of nonzeros in the various rows of the
4032            OFF-DIAGONAL portion of the local submatrix (possibly different for
4033            each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero
4034            structure. The size of this array is equal to the number
4035            of local rows, i.e 'm'.
4036 
4037    If the *_nnz parameter is given then the *_nz parameter is ignored
4038 
4039    The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77
4040    storage.  The stored row and column indices begin with zero.
4041    See [Sparse Matrices](sec_matsparse) for details.
4042 
4043    The parallel matrix is partitioned such that the first m0 rows belong to
4044    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4045    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4046 
4047    The DIAGONAL portion of the local submatrix of a processor can be defined
4048    as the submatrix which is obtained by extraction the part corresponding to
4049    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4050    first row that belongs to the processor, r2 is the last row belonging to
4051    the this processor, and c1-c2 is range of indices of the local part of a
4052    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4053    common case of a square matrix, the row and column ranges are the same and
4054    the DIAGONAL part is also square. The remaining portion of the local
4055    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4056 
4057    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4058 
4059    You can call MatGetInfo() to get information on how effective the preallocation was;
4060    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4061    You can also run with the option -info and look for messages with the string
4062    malloc in them to see if additional memory allocation was needed.
4063 
4064    Example usage:
4065 
4066    Consider the following 8x8 matrix with 34 non-zero values, that is
4067    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4068    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4069    as follows:
4070 
4071 .vb
4072             1  2  0  |  0  3  0  |  0  4
4073     Proc0   0  5  6  |  7  0  0  |  8  0
4074             9  0 10  | 11  0  0  | 12  0
4075     -------------------------------------
4076            13  0 14  | 15 16 17  |  0  0
4077     Proc1   0 18  0  | 19 20 21  |  0  0
4078             0  0  0  | 22 23  0  | 24  0
4079     -------------------------------------
4080     Proc2  25 26 27  |  0  0 28  | 29  0
4081            30  0  0  | 31 32 33  |  0 34
4082 .ve
4083 
4084    This can be represented as a collection of submatrices as:
4085 
4086 .vb
4087       A B C
4088       D E F
4089       G H I
4090 .ve
4091 
4092    Where the submatrices A,B,C are owned by proc0, D,E,F are
4093    owned by proc1, G,H,I are owned by proc2.
4094 
4095    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4096    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4097    The 'M','N' parameters are 8,8, and have the same values on all procs.
4098 
4099    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4100    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4101    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4102    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4103    part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ
4104    matrix, ans [DF] as another `MATSEQAIJ` matrix.
4105 
4106    When d_nz, o_nz parameters are specified, d_nz storage elements are
4107    allocated for every row of the local diagonal submatrix, and o_nz
4108    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4109    One way to choose d_nz and o_nz is to use the max nonzerors per local
4110    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4111    In this case, the values of d_nz,o_nz are:
4112 .vb
4113      proc0 : dnz = 2, o_nz = 2
4114      proc1 : dnz = 3, o_nz = 2
4115      proc2 : dnz = 1, o_nz = 4
4116 .ve
4117    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4118    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4119    for proc3. i.e we are using 12+15+10=37 storage locations to store
4120    34 values.
4121 
4122    When d_nnz, o_nnz parameters are specified, the storage is specified
4123    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4124    In the above case the values for d_nnz,o_nnz are:
4125 .vb
4126      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4127      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4128      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4129 .ve
4130    Here the space allocated is sum of all the above values i.e 34, and
4131    hence pre-allocation is perfect.
4132 
4133    Level: intermediate
4134 
4135 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`,
4136           `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()`
4137 @*/
4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
4139 {
4140   PetscFunctionBegin;
4141   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
4142   PetscValidType(B, 1);
4143   PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz));
4144   PetscFunctionReturn(PETSC_SUCCESS);
4145 }
4146 
4147 /*@
4148      MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard
4149          CSR format for the local rows.
4150 
4151    Collective
4152 
4153    Input Parameters:
4154 +  comm - MPI communicator
4155 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4156 .  n - This value should be the same as the local size used in creating the
4157        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4158        calculated if N is given) For square matrices n is almost always m.
4159 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4160 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4161 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4162 .   j - column indices
4163 -   a - optional matrix values
4164 
4165    Output Parameter:
4166 .   mat - the matrix
4167 
4168    Level: intermediate
4169 
4170    Notes:
4171        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4172      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4173      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4174 
4175        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4176 
4177        The format which is used for the sparse matrix input, is equivalent to a
4178     row-major ordering.. i.e for the following matrix, the input data expected is
4179     as shown
4180 
4181        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4182 
4183 $        1 0 0
4184 $        2 0 3     P0
4185 $       -------
4186 $        4 5 6     P1
4187 $
4188 $     Process0 [P0]: rows_owned=[0,1]
4189 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4190 $        j =  {0,0,2}  [size = 3]
4191 $        v =  {1,2,3}  [size = 3]
4192 $
4193 $     Process1 [P1]: rows_owned=[2]
4194 $        i =  {0,3}    [size = nrow+1  = 1+1]
4195 $        j =  {0,1,2}  [size = 3]
4196 $        v =  {4,5,6}  [size = 3]
4197 
4198 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4199           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`
4200 @*/
4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
4202 {
4203   PetscFunctionBegin;
4204   PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4205   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4206   PetscCall(MatCreate(comm, mat));
4207   PetscCall(MatSetSizes(*mat, m, n, M, N));
4208   /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */
4209   PetscCall(MatSetType(*mat, MATMPIAIJ));
4210   PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a));
4211   PetscFunctionReturn(PETSC_SUCCESS);
4212 }
4213 
4214 /*@
4215      MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard
4216          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()`
4217 
4218      Deprecated: Use `MatUpdateMPIAIJWithArray()`
4219 
4220    Collective
4221 
4222    Input Parameters:
4223 +  mat - the matrix
4224 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
4225 .  n - This value should be the same as the local size used in creating the
4226        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
4227        calculated if N is given) For square matrices n is almost always m.
4228 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4229 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4230 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4231 .  J - column indices
4232 -  v - matrix values
4233 
4234    Level: intermediate
4235 
4236 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4237           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()`
4238 @*/
4239 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[])
4240 {
4241   PetscInt        nnz, i;
4242   PetscBool       nooffprocentries;
4243   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4244   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4245   PetscScalar    *ad, *ao;
4246   PetscInt        ldi, Iii, md;
4247   const PetscInt *Adi = Ad->i;
4248   PetscInt       *ld  = Aij->ld;
4249 
4250   PetscFunctionBegin;
4251   PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
4252   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
4253   PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4254   PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4255 
4256   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4257   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4258 
4259   for (i = 0; i < m; i++) {
4260     nnz = Ii[i + 1] - Ii[i];
4261     Iii = Ii[i];
4262     ldi = ld[i];
4263     md  = Adi[i + 1] - Adi[i];
4264     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4265     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4266     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4267     ad += md;
4268     ao += nnz - md;
4269   }
4270   nooffprocentries      = mat->nooffprocentries;
4271   mat->nooffprocentries = PETSC_TRUE;
4272   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4273   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4274   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4275   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4276   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4277   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4278   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4279   mat->nooffprocentries = nooffprocentries;
4280   PetscFunctionReturn(PETSC_SUCCESS);
4281 }
4282 
4283 /*@
4284      MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values
4285 
4286    Collective
4287 
4288    Input Parameters:
4289 +  mat - the matrix
4290 -  v - matrix values, stored by row
4291 
4292    Level: intermediate
4293 
4294    Note:
4295    The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()`
4296 
4297 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4298           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()`
4299 @*/
4300 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[])
4301 {
4302   PetscInt        nnz, i, m;
4303   PetscBool       nooffprocentries;
4304   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *)mat->data;
4305   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ *)Aij->A->data;
4306   Mat_SeqAIJ     *Ao  = (Mat_SeqAIJ *)Aij->B->data;
4307   PetscScalar    *ad, *ao;
4308   const PetscInt *Adi = Ad->i, *Adj = Ao->i;
4309   PetscInt        ldi, Iii, md;
4310   PetscInt       *ld = Aij->ld;
4311 
4312   PetscFunctionBegin;
4313   m = mat->rmap->n;
4314 
4315   PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad));
4316   PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao));
4317   Iii = 0;
4318   for (i = 0; i < m; i++) {
4319     nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i];
4320     ldi = ld[i];
4321     md  = Adi[i + 1] - Adi[i];
4322     PetscCall(PetscArraycpy(ao, v + Iii, ldi));
4323     PetscCall(PetscArraycpy(ad, v + Iii + ldi, md));
4324     PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md));
4325     ad += md;
4326     ao += nnz - md;
4327     Iii += nnz;
4328   }
4329   nooffprocentries      = mat->nooffprocentries;
4330   mat->nooffprocentries = PETSC_TRUE;
4331   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad));
4332   PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao));
4333   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A));
4334   PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B));
4335   PetscCall(PetscObjectStateIncrease((PetscObject)mat));
4336   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
4337   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
4338   mat->nooffprocentries = nooffprocentries;
4339   PetscFunctionReturn(PETSC_SUCCESS);
4340 }
4341 
4342 /*@C
4343    MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format
4344    (the default parallel PETSc format).  For good matrix assembly performance
4345    the user should preallocate the matrix storage by setting the parameters
4346    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4347    performance can be increased by more than a factor of 50.
4348 
4349    Collective
4350 
4351    Input Parameters:
4352 +  comm - MPI communicator
4353 .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
4354            This value should be the same as the local size used in creating the
4355            y vector for the matrix-vector product y = Ax.
4356 .  n - This value should be the same as the local size used in creating the
4357        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4358        calculated if N is given) For square matrices n is almost always m.
4359 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
4360 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
4361 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4362            (same value is used for all local rows)
4363 .  d_nnz - array containing the number of nonzeros in the various rows of the
4364            DIAGONAL portion of the local submatrix (possibly different for each row)
4365            or NULL, if d_nz is used to specify the nonzero structure.
4366            The size of this array is equal to the number of local rows, i.e 'm'.
4367 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4368            submatrix (same value is used for all local rows).
4369 -  o_nnz - array containing the number of nonzeros in the various rows of the
4370            OFF-DIAGONAL portion of the local submatrix (possibly different for
4371            each row) or NULL, if o_nz is used to specify the nonzero
4372            structure. The size of this array is equal to the number
4373            of local rows, i.e 'm'.
4374 
4375    Output Parameter:
4376 .  A - the matrix
4377 
4378    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
4379    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4380    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
4381 
4382    Notes:
4383    If the *_nnz parameter is given then the *_nz parameter is ignored
4384 
4385    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4386    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4387    storage requirements for this matrix.
4388 
4389    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one
4390    processor than it must be used on all processors that share the object for
4391    that argument.
4392 
4393    The user MUST specify either the local or global matrix dimensions
4394    (possibly both).
4395 
4396    The parallel matrix is partitioned across processors such that the
4397    first m0 rows belong to process 0, the next m1 rows belong to
4398    process 1, the next m2 rows belong to process 2 etc.. where
4399    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4400    values corresponding to [m x N] submatrix.
4401 
4402    The columns are logically partitioned with the n0 columns belonging
4403    to 0th partition, the next n1 columns belonging to the next
4404    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4405 
4406    The DIAGONAL portion of the local submatrix on any given processor
4407    is the submatrix corresponding to the rows and columns m,n
4408    corresponding to the given processor. i.e diagonal matrix on
4409    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4410    etc. The remaining portion of the local submatrix [m x (N-n)]
4411    constitute the OFF-DIAGONAL portion. The example below better
4412    illustrates this concept.
4413 
4414    For a square global matrix we define each processor's diagonal portion
4415    to be its local rows and the corresponding columns (a square submatrix);
4416    each processor's off-diagonal portion encompasses the remainder of the
4417    local matrix (a rectangular submatrix).
4418 
4419    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4420 
4421    When calling this routine with a single process communicator, a matrix of
4422    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4423    type of communicator, use the construction mechanism
4424 .vb
4425      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4426 .ve
4427 
4428 $     MatCreate(...,&A);
4429 $     MatSetType(A,MATMPIAIJ);
4430 $     MatSetSizes(A, m,n,M,N);
4431 $     MatMPIAIJSetPreallocation(A,...);
4432 
4433    By default, this format uses inodes (identical nodes) when possible.
4434    We search for consecutive rows with the same nonzero structure, thereby
4435    reusing matrix information to achieve increased efficiency.
4436 
4437    Options Database Keys:
4438 +  -mat_no_inode  - Do not use inodes
4439 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4440 -  -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices.
4441         See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix.
4442         Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call.
4443 
4444    Example usage:
4445 
4446    Consider the following 8x8 matrix with 34 non-zero values, that is
4447    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4448    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4449    as follows
4450 
4451 .vb
4452             1  2  0  |  0  3  0  |  0  4
4453     Proc0   0  5  6  |  7  0  0  |  8  0
4454             9  0 10  | 11  0  0  | 12  0
4455     -------------------------------------
4456            13  0 14  | 15 16 17  |  0  0
4457     Proc1   0 18  0  | 19 20 21  |  0  0
4458             0  0  0  | 22 23  0  | 24  0
4459     -------------------------------------
4460     Proc2  25 26 27  |  0  0 28  | 29  0
4461            30  0  0  | 31 32 33  |  0 34
4462 .ve
4463 
4464    This can be represented as a collection of submatrices as
4465 
4466 .vb
4467       A B C
4468       D E F
4469       G H I
4470 .ve
4471 
4472    Where the submatrices A,B,C are owned by proc0, D,E,F are
4473    owned by proc1, G,H,I are owned by proc2.
4474 
4475    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4476    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4477    The 'M','N' parameters are 8,8, and have the same values on all procs.
4478 
4479    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4480    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4481    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4482    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4483    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4484    matrix, ans [DF] as another SeqAIJ matrix.
4485 
4486    When d_nz, o_nz parameters are specified, d_nz storage elements are
4487    allocated for every row of the local diagonal submatrix, and o_nz
4488    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4489    One way to choose d_nz and o_nz is to use the max nonzerors per local
4490    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4491    In this case, the values of d_nz,o_nz are
4492 .vb
4493      proc0 : dnz = 2, o_nz = 2
4494      proc1 : dnz = 3, o_nz = 2
4495      proc2 : dnz = 1, o_nz = 4
4496 .ve
4497    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4498    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4499    for proc3. i.e we are using 12+15+10=37 storage locations to store
4500    34 values.
4501 
4502    When d_nnz, o_nnz parameters are specified, the storage is specified
4503    for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4504    In the above case the values for d_nnz,o_nnz are
4505 .vb
4506      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4507      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4508      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4509 .ve
4510    Here the space allocated is sum of all the above values i.e 34, and
4511    hence pre-allocation is perfect.
4512 
4513    Level: intermediate
4514 
4515 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
4516           `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`
4517 @*/
4518 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
4519 {
4520   PetscMPIInt size;
4521 
4522   PetscFunctionBegin;
4523   PetscCall(MatCreate(comm, A));
4524   PetscCall(MatSetSizes(*A, m, n, M, N));
4525   PetscCallMPI(MPI_Comm_size(comm, &size));
4526   if (size > 1) {
4527     PetscCall(MatSetType(*A, MATMPIAIJ));
4528     PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz));
4529   } else {
4530     PetscCall(MatSetType(*A, MATSEQAIJ));
4531     PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz));
4532   }
4533   PetscFunctionReturn(PETSC_SUCCESS);
4534 }
4535 
4536 /*MC
4537     MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix
4538 
4539     Synopsis:
4540     MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4541 
4542     Not Collective
4543 
4544     Input Parameter:
4545 .   A - the `MATMPIAIJ` matrix
4546 
4547     Output Parameters:
4548 +   Ad - the diagonal portion of the matrix
4549 .   Ao - the off diagonal portion of the matrix
4550 .   colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4551 -   ierr - error code
4552 
4553      Level: advanced
4554 
4555     Note:
4556     Use  `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4557 
4558 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()`
4559 M*/
4560 
4561 /*MC
4562     MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap`
4563 
4564     Synopsis:
4565     MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr)
4566 
4567     Not Collective
4568 
4569     Input Parameters:
4570 +   A - the `MATMPIAIJ` matrix
4571 .   Ad - the diagonal portion of the matrix
4572 .   Ao - the off diagonal portion of the matrix
4573 .   colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4574 -   ierr - error code
4575 
4576      Level: advanced
4577 
4578 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()`
4579 M*/
4580 
4581 /*@C
4582   MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix
4583 
4584   Not collective
4585 
4586   Input Parameter:
4587 . A - The `MATMPIAIJ` matrix
4588 
4589   Output Parameters:
4590 + Ad - The local diagonal block as a `MATSEQAIJ` matrix
4591 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix
4592 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4593 
4594   Level: intermediate
4595 
4596   Note:
4597   The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4598   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4599   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4600   local column numbers to global column numbers in the original matrix.
4601 
4602   Fortran Note:
4603   `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()`
4604 
4605 .seealso: `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ`
4606 @*/
4607 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[])
4608 {
4609   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
4610   PetscBool   flg;
4611 
4612   PetscFunctionBegin;
4613   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg));
4614   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input");
4615   if (Ad) *Ad = a->A;
4616   if (Ao) *Ao = a->B;
4617   if (colmap) *colmap = a->garray;
4618   PetscFunctionReturn(PETSC_SUCCESS);
4619 }
4620 
4621 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
4622 {
4623   PetscInt     m, N, i, rstart, nnz, Ii;
4624   PetscInt    *indx;
4625   PetscScalar *values;
4626   MatType      rootType;
4627 
4628   PetscFunctionBegin;
4629   PetscCall(MatGetSize(inmat, &m, &N));
4630   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4631     PetscInt *dnz, *onz, sum, bs, cbs;
4632 
4633     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N));
4634     /* Check sum(n) = N */
4635     PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm));
4636     PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N);
4637 
4638     PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm));
4639     rstart -= m;
4640 
4641     MatPreallocateBegin(comm, m, n, dnz, onz);
4642     for (i = 0; i < m; i++) {
4643       PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4644       PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz));
4645       PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL));
4646     }
4647 
4648     PetscCall(MatCreate(comm, outmat));
4649     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
4650     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
4651     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
4652     PetscCall(MatGetRootType_Private(inmat, &rootType));
4653     PetscCall(MatSetType(*outmat, rootType));
4654     PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz));
4655     PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz));
4656     MatPreallocateEnd(dnz, onz);
4657     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
4658   }
4659 
4660   /* numeric phase */
4661   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
4662   for (i = 0; i < m; i++) {
4663     PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4664     Ii = i + rstart;
4665     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
4666     PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values));
4667   }
4668   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
4669   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
4670   PetscFunctionReturn(PETSC_SUCCESS);
4671 }
4672 
4673 PetscErrorCode MatFileSplit(Mat A, char *outfile)
4674 {
4675   PetscMPIInt        rank;
4676   PetscInt           m, N, i, rstart, nnz;
4677   size_t             len;
4678   const PetscInt    *indx;
4679   PetscViewer        out;
4680   char              *name;
4681   Mat                B;
4682   const PetscScalar *values;
4683 
4684   PetscFunctionBegin;
4685   PetscCall(MatGetLocalSize(A, &m, NULL));
4686   PetscCall(MatGetSize(A, NULL, &N));
4687   /* Should this be the type of the diagonal block of A? */
4688   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
4689   PetscCall(MatSetSizes(B, m, N, m, N));
4690   PetscCall(MatSetBlockSizesFromMats(B, A, A));
4691   PetscCall(MatSetType(B, MATSEQAIJ));
4692   PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
4693   PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
4694   for (i = 0; i < m; i++) {
4695     PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values));
4696     PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES));
4697     PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values));
4698   }
4699   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
4700   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
4701 
4702   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
4703   PetscCall(PetscStrlen(outfile, &len));
4704   PetscCall(PetscMalloc1(len + 6, &name));
4705   PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank));
4706   PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out));
4707   PetscCall(PetscFree(name));
4708   PetscCall(MatView(B, out));
4709   PetscCall(PetscViewerDestroy(&out));
4710   PetscCall(MatDestroy(&B));
4711   PetscFunctionReturn(PETSC_SUCCESS);
4712 }
4713 
4714 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4715 {
4716   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4717 
4718   PetscFunctionBegin;
4719   if (!merge) PetscFunctionReturn(PETSC_SUCCESS);
4720   PetscCall(PetscFree(merge->id_r));
4721   PetscCall(PetscFree(merge->len_s));
4722   PetscCall(PetscFree(merge->len_r));
4723   PetscCall(PetscFree(merge->bi));
4724   PetscCall(PetscFree(merge->bj));
4725   PetscCall(PetscFree(merge->buf_ri[0]));
4726   PetscCall(PetscFree(merge->buf_ri));
4727   PetscCall(PetscFree(merge->buf_rj[0]));
4728   PetscCall(PetscFree(merge->buf_rj));
4729   PetscCall(PetscFree(merge->coi));
4730   PetscCall(PetscFree(merge->coj));
4731   PetscCall(PetscFree(merge->owners_co));
4732   PetscCall(PetscLayoutDestroy(&merge->rowmap));
4733   PetscCall(PetscFree(merge));
4734   PetscFunctionReturn(PETSC_SUCCESS);
4735 }
4736 
4737 #include <../src/mat/utils/freespace.h>
4738 #include <petscbt.h>
4739 
4740 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat)
4741 {
4742   MPI_Comm             comm;
4743   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4744   PetscMPIInt          size, rank, taga, *len_s;
4745   PetscInt             N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj;
4746   PetscInt             proc, m;
4747   PetscInt           **buf_ri, **buf_rj;
4748   PetscInt             k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj;
4749   PetscInt             nrows, **buf_ri_k, **nextrow, **nextai;
4750   MPI_Request         *s_waits, *r_waits;
4751   MPI_Status          *status;
4752   const MatScalar     *aa, *a_a;
4753   MatScalar          **abuf_r, *ba_i;
4754   Mat_Merge_SeqsToMPI *merge;
4755   PetscContainer       container;
4756 
4757   PetscFunctionBegin;
4758   PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm));
4759   PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0));
4760 
4761   PetscCallMPI(MPI_Comm_size(comm, &size));
4762   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4763 
4764   PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container));
4765   PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4766   PetscCall(PetscContainerGetPointer(container, (void **)&merge));
4767   PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a));
4768   aa = a_a;
4769 
4770   bi     = merge->bi;
4771   bj     = merge->bj;
4772   buf_ri = merge->buf_ri;
4773   buf_rj = merge->buf_rj;
4774 
4775   PetscCall(PetscMalloc1(size, &status));
4776   owners = merge->rowmap->range;
4777   len_s  = merge->len_s;
4778 
4779   /* send and recv matrix values */
4780   /*-----------------------------*/
4781   PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga));
4782   PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits));
4783 
4784   PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits));
4785   for (proc = 0, k = 0; proc < size; proc++) {
4786     if (!len_s[proc]) continue;
4787     i = owners[proc];
4788     PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k));
4789     k++;
4790   }
4791 
4792   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status));
4793   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status));
4794   PetscCall(PetscFree(status));
4795 
4796   PetscCall(PetscFree(s_waits));
4797   PetscCall(PetscFree(r_waits));
4798 
4799   /* insert mat values of mpimat */
4800   /*----------------------------*/
4801   PetscCall(PetscMalloc1(N, &ba_i));
4802   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
4803 
4804   for (k = 0; k < merge->nrecv; k++) {
4805     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4806     nrows       = *(buf_ri_k[k]);
4807     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
4808     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
4809   }
4810 
4811   /* set values of ba */
4812   m = merge->rowmap->n;
4813   for (i = 0; i < m; i++) {
4814     arow = owners[rank] + i;
4815     bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */
4816     bnzi = bi[i + 1] - bi[i];
4817     PetscCall(PetscArrayzero(ba_i, bnzi));
4818 
4819     /* add local non-zero vals of this proc's seqmat into ba */
4820     anzi   = ai[arow + 1] - ai[arow];
4821     aj     = a->j + ai[arow];
4822     aa     = a_a + ai[arow];
4823     nextaj = 0;
4824     for (j = 0; nextaj < anzi; j++) {
4825       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4826         ba_i[j] += aa[nextaj++];
4827       }
4828     }
4829 
4830     /* add received vals into ba */
4831     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
4832       /* i-th row */
4833       if (i == *nextrow[k]) {
4834         anzi   = *(nextai[k] + 1) - *nextai[k];
4835         aj     = buf_rj[k] + *(nextai[k]);
4836         aa     = abuf_r[k] + *(nextai[k]);
4837         nextaj = 0;
4838         for (j = 0; nextaj < anzi; j++) {
4839           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4840             ba_i[j] += aa[nextaj++];
4841           }
4842         }
4843         nextrow[k]++;
4844         nextai[k]++;
4845       }
4846     }
4847     PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES));
4848   }
4849   PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a));
4850   PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY));
4851   PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY));
4852 
4853   PetscCall(PetscFree(abuf_r[0]));
4854   PetscCall(PetscFree(abuf_r));
4855   PetscCall(PetscFree(ba_i));
4856   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
4857   PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0));
4858   PetscFunctionReturn(PETSC_SUCCESS);
4859 }
4860 
4861 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat)
4862 {
4863   Mat                  B_mpi;
4864   Mat_SeqAIJ          *a = (Mat_SeqAIJ *)seqmat->data;
4865   PetscMPIInt          size, rank, tagi, tagj, *len_s, *len_si, *len_ri;
4866   PetscInt           **buf_rj, **buf_ri, **buf_ri_k;
4867   PetscInt             M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j;
4868   PetscInt             len, proc, *dnz, *onz, bs, cbs;
4869   PetscInt             k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi;
4870   PetscInt             nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai;
4871   MPI_Request         *si_waits, *sj_waits, *ri_waits, *rj_waits;
4872   MPI_Status          *status;
4873   PetscFreeSpaceList   free_space = NULL, current_space = NULL;
4874   PetscBT              lnkbt;
4875   Mat_Merge_SeqsToMPI *merge;
4876   PetscContainer       container;
4877 
4878   PetscFunctionBegin;
4879   PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0));
4880 
4881   /* make sure it is a PETSc comm */
4882   PetscCall(PetscCommDuplicate(comm, &comm, NULL));
4883   PetscCallMPI(MPI_Comm_size(comm, &size));
4884   PetscCallMPI(MPI_Comm_rank(comm, &rank));
4885 
4886   PetscCall(PetscNew(&merge));
4887   PetscCall(PetscMalloc1(size, &status));
4888 
4889   /* determine row ownership */
4890   /*---------------------------------------------------------*/
4891   PetscCall(PetscLayoutCreate(comm, &merge->rowmap));
4892   PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m));
4893   PetscCall(PetscLayoutSetSize(merge->rowmap, M));
4894   PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1));
4895   PetscCall(PetscLayoutSetUp(merge->rowmap));
4896   PetscCall(PetscMalloc1(size, &len_si));
4897   PetscCall(PetscMalloc1(size, &merge->len_s));
4898 
4899   m      = merge->rowmap->n;
4900   owners = merge->rowmap->range;
4901 
4902   /* determine the number of messages to send, their lengths */
4903   /*---------------------------------------------------------*/
4904   len_s = merge->len_s;
4905 
4906   len          = 0; /* length of buf_si[] */
4907   merge->nsend = 0;
4908   for (proc = 0; proc < size; proc++) {
4909     len_si[proc] = 0;
4910     if (proc == rank) {
4911       len_s[proc] = 0;
4912     } else {
4913       len_si[proc] = owners[proc + 1] - owners[proc] + 1;
4914       len_s[proc]  = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4915     }
4916     if (len_s[proc]) {
4917       merge->nsend++;
4918       nrows = 0;
4919       for (i = owners[proc]; i < owners[proc + 1]; i++) {
4920         if (ai[i + 1] > ai[i]) nrows++;
4921       }
4922       len_si[proc] = 2 * (nrows + 1);
4923       len += len_si[proc];
4924     }
4925   }
4926 
4927   /* determine the number and length of messages to receive for ij-structure */
4928   /*-------------------------------------------------------------------------*/
4929   PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv));
4930   PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri));
4931 
4932   /* post the Irecv of j-structure */
4933   /*-------------------------------*/
4934   PetscCall(PetscCommGetNewTag(comm, &tagj));
4935   PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits));
4936 
4937   /* post the Isend of j-structure */
4938   /*--------------------------------*/
4939   PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits));
4940 
4941   for (proc = 0, k = 0; proc < size; proc++) {
4942     if (!len_s[proc]) continue;
4943     i = owners[proc];
4944     PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k));
4945     k++;
4946   }
4947 
4948   /* receives and sends of j-structure are complete */
4949   /*------------------------------------------------*/
4950   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status));
4951   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status));
4952 
4953   /* send and recv i-structure */
4954   /*---------------------------*/
4955   PetscCall(PetscCommGetNewTag(comm, &tagi));
4956   PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits));
4957 
4958   PetscCall(PetscMalloc1(len + 1, &buf_s));
4959   buf_si = buf_s; /* points to the beginning of k-th msg to be sent */
4960   for (proc = 0, k = 0; proc < size; proc++) {
4961     if (!len_s[proc]) continue;
4962     /* form outgoing message for i-structure:
4963          buf_si[0]:                 nrows to be sent
4964                [1:nrows]:           row index (global)
4965                [nrows+1:2*nrows+1]: i-structure index
4966     */
4967     /*-------------------------------------------*/
4968     nrows       = len_si[proc] / 2 - 1;
4969     buf_si_i    = buf_si + nrows + 1;
4970     buf_si[0]   = nrows;
4971     buf_si_i[0] = 0;
4972     nrows       = 0;
4973     for (i = owners[proc]; i < owners[proc + 1]; i++) {
4974       anzi = ai[i + 1] - ai[i];
4975       if (anzi) {
4976         buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */
4977         buf_si[nrows + 1]   = i - owners[proc];       /* local row index */
4978         nrows++;
4979       }
4980     }
4981     PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k));
4982     k++;
4983     buf_si += len_si[proc];
4984   }
4985 
4986   if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status));
4987   if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status));
4988 
4989   PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv));
4990   for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i]));
4991 
4992   PetscCall(PetscFree(len_si));
4993   PetscCall(PetscFree(len_ri));
4994   PetscCall(PetscFree(rj_waits));
4995   PetscCall(PetscFree2(si_waits, sj_waits));
4996   PetscCall(PetscFree(ri_waits));
4997   PetscCall(PetscFree(buf_s));
4998   PetscCall(PetscFree(status));
4999 
5000   /* compute a local seq matrix in each processor */
5001   /*----------------------------------------------*/
5002   /* allocate bi array and free space for accumulating nonzero column info */
5003   PetscCall(PetscMalloc1(m + 1, &bi));
5004   bi[0] = 0;
5005 
5006   /* create and initialize a linked list */
5007   nlnk = N + 1;
5008   PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt));
5009 
5010   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5011   len = ai[owners[rank + 1]] - ai[owners[rank]];
5012   PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space));
5013 
5014   current_space = free_space;
5015 
5016   /* determine symbolic info for each local row */
5017   PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai));
5018 
5019   for (k = 0; k < merge->nrecv; k++) {
5020     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5021     nrows       = *buf_ri_k[k];
5022     nextrow[k]  = buf_ri_k[k] + 1;           /* next row number of k-th recved i-structure */
5023     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure  */
5024   }
5025 
5026   MatPreallocateBegin(comm, m, n, dnz, onz);
5027   len = 0;
5028   for (i = 0; i < m; i++) {
5029     bnzi = 0;
5030     /* add local non-zero cols of this proc's seqmat into lnk */
5031     arow = owners[rank] + i;
5032     anzi = ai[arow + 1] - ai[arow];
5033     aj   = a->j + ai[arow];
5034     PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5035     bnzi += nlnk;
5036     /* add received col data into lnk */
5037     for (k = 0; k < merge->nrecv; k++) { /* k-th received message */
5038       if (i == *nextrow[k]) {            /* i-th row */
5039         anzi = *(nextai[k] + 1) - *nextai[k];
5040         aj   = buf_rj[k] + *nextai[k];
5041         PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt));
5042         bnzi += nlnk;
5043         nextrow[k]++;
5044         nextai[k]++;
5045       }
5046     }
5047     if (len < bnzi) len = bnzi; /* =max(bnzi) */
5048 
5049     /* if free space is not available, make more free space */
5050     if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), &current_space));
5051     /* copy data into free space, then initialize lnk */
5052     PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt));
5053     PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz));
5054 
5055     current_space->array += bnzi;
5056     current_space->local_used += bnzi;
5057     current_space->local_remaining -= bnzi;
5058 
5059     bi[i + 1] = bi[i] + bnzi;
5060   }
5061 
5062   PetscCall(PetscFree3(buf_ri_k, nextrow, nextai));
5063 
5064   PetscCall(PetscMalloc1(bi[m] + 1, &bj));
5065   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
5066   PetscCall(PetscLLDestroy(lnk, lnkbt));
5067 
5068   /* create symbolic parallel matrix B_mpi */
5069   /*---------------------------------------*/
5070   PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs));
5071   PetscCall(MatCreate(comm, &B_mpi));
5072   if (n == PETSC_DECIDE) {
5073     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N));
5074   } else {
5075     PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
5076   }
5077   PetscCall(MatSetBlockSizes(B_mpi, bs, cbs));
5078   PetscCall(MatSetType(B_mpi, MATMPIAIJ));
5079   PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz));
5080   MatPreallocateEnd(dnz, onz);
5081   PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
5082 
5083   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5084   B_mpi->assembled = PETSC_FALSE;
5085   merge->bi        = bi;
5086   merge->bj        = bj;
5087   merge->buf_ri    = buf_ri;
5088   merge->buf_rj    = buf_rj;
5089   merge->coi       = NULL;
5090   merge->coj       = NULL;
5091   merge->owners_co = NULL;
5092 
5093   PetscCall(PetscCommDestroy(&comm));
5094 
5095   /* attach the supporting struct to B_mpi for reuse */
5096   PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container));
5097   PetscCall(PetscContainerSetPointer(container, merge));
5098   PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI));
5099   PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container));
5100   PetscCall(PetscContainerDestroy(&container));
5101   *mpimat = B_mpi;
5102 
5103   PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0));
5104   PetscFunctionReturn(PETSC_SUCCESS);
5105 }
5106 
5107 /*@C
5108       MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential
5109                  matrices from each processor
5110 
5111     Collective
5112 
5113    Input Parameters:
5114 +    comm - the communicators the parallel matrix will live on
5115 .    seqmat - the input sequential matrices
5116 .    m - number of local rows (or `PETSC_DECIDE`)
5117 .    n - number of local columns (or `PETSC_DECIDE`)
5118 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5119 
5120    Output Parameter:
5121 .    mpimat - the parallel matrix generated
5122 
5123     Level: advanced
5124 
5125    Note:
5126      The dimensions of the sequential matrix in each processor MUST be the same.
5127      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5128      destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat.
5129 @*/
5130 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat)
5131 {
5132   PetscMPIInt size;
5133 
5134   PetscFunctionBegin;
5135   PetscCallMPI(MPI_Comm_size(comm, &size));
5136   if (size == 1) {
5137     PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5138     if (scall == MAT_INITIAL_MATRIX) {
5139       PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat));
5140     } else {
5141       PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN));
5142     }
5143     PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5144     PetscFunctionReturn(PETSC_SUCCESS);
5145   }
5146   PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0));
5147   if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat));
5148   PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat));
5149   PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0));
5150   PetscFunctionReturn(PETSC_SUCCESS);
5151 }
5152 
5153 /*@
5154      MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5155           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5156           with `MatGetSize()`
5157 
5158     Not Collective
5159 
5160    Input Parameters:
5161 +    A - the matrix
5162 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5163 
5164    Output Parameter:
5165 .    A_loc - the local sequential matrix generated
5166 
5167     Level: developer
5168 
5169    Notes:
5170      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5171 
5172      Destroy the matrix with `MatDestroy()`
5173 
5174 .seealso: `MatMPIAIJGetLocalMat()`
5175 @*/
5176 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc)
5177 {
5178   PetscBool mpi;
5179 
5180   PetscFunctionBegin;
5181   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi));
5182   if (mpi) {
5183     PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc));
5184   } else {
5185     *A_loc = A;
5186     PetscCall(PetscObjectReference((PetscObject)*A_loc));
5187   }
5188   PetscFunctionReturn(PETSC_SUCCESS);
5189 }
5190 
5191 /*@
5192      MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5193           mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained
5194           with `MatGetSize()`
5195 
5196     Not Collective
5197 
5198    Input Parameters:
5199 +    A - the matrix
5200 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5201 
5202    Output Parameter:
5203 .    A_loc - the local sequential matrix generated
5204 
5205     Level: developer
5206 
5207    Notes:
5208      In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix.
5209 
5210      When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A.
5211      If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called.
5212      This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely
5213      modify the values of the returned A_loc.
5214 
5215 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()`
5216 @*/
5217 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc)
5218 {
5219   Mat_MPIAIJ        *mpimat = (Mat_MPIAIJ *)A->data;
5220   Mat_SeqAIJ        *mat, *a, *b;
5221   PetscInt          *ai, *aj, *bi, *bj, *cmap = mpimat->garray;
5222   const PetscScalar *aa, *ba, *aav, *bav;
5223   PetscScalar       *ca, *cam;
5224   PetscMPIInt        size;
5225   PetscInt           am = A->rmap->n, i, j, k, cstart = A->cmap->rstart;
5226   PetscInt          *ci, *cj, col, ncols_d, ncols_o, jo;
5227   PetscBool          match;
5228 
5229   PetscFunctionBegin;
5230   PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match));
5231   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5232   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5233   if (size == 1) {
5234     if (scall == MAT_INITIAL_MATRIX) {
5235       PetscCall(PetscObjectReference((PetscObject)mpimat->A));
5236       *A_loc = mpimat->A;
5237     } else if (scall == MAT_REUSE_MATRIX) {
5238       PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN));
5239     }
5240     PetscFunctionReturn(PETSC_SUCCESS);
5241   }
5242 
5243   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5244   a  = (Mat_SeqAIJ *)(mpimat->A)->data;
5245   b  = (Mat_SeqAIJ *)(mpimat->B)->data;
5246   ai = a->i;
5247   aj = a->j;
5248   bi = b->i;
5249   bj = b->j;
5250   PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav));
5251   PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav));
5252   aa = aav;
5253   ba = bav;
5254   if (scall == MAT_INITIAL_MATRIX) {
5255     PetscCall(PetscMalloc1(1 + am, &ci));
5256     ci[0] = 0;
5257     for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]);
5258     PetscCall(PetscMalloc1(1 + ci[am], &cj));
5259     PetscCall(PetscMalloc1(1 + ci[am], &ca));
5260     k = 0;
5261     for (i = 0; i < am; i++) {
5262       ncols_o = bi[i + 1] - bi[i];
5263       ncols_d = ai[i + 1] - ai[i];
5264       /* off-diagonal portion of A */
5265       for (jo = 0; jo < ncols_o; jo++) {
5266         col = cmap[*bj];
5267         if (col >= cstart) break;
5268         cj[k] = col;
5269         bj++;
5270         ca[k++] = *ba++;
5271       }
5272       /* diagonal portion of A */
5273       for (j = 0; j < ncols_d; j++) {
5274         cj[k]   = cstart + *aj++;
5275         ca[k++] = *aa++;
5276       }
5277       /* off-diagonal portion of A */
5278       for (j = jo; j < ncols_o; j++) {
5279         cj[k]   = cmap[*bj++];
5280         ca[k++] = *ba++;
5281       }
5282     }
5283     /* put together the new matrix */
5284     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc));
5285     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5286     /* Since these are PETSc arrays, change flags to free them as necessary. */
5287     mat          = (Mat_SeqAIJ *)(*A_loc)->data;
5288     mat->free_a  = PETSC_TRUE;
5289     mat->free_ij = PETSC_TRUE;
5290     mat->nonew   = 0;
5291   } else if (scall == MAT_REUSE_MATRIX) {
5292     mat = (Mat_SeqAIJ *)(*A_loc)->data;
5293     ci  = mat->i;
5294     cj  = mat->j;
5295     PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam));
5296     for (i = 0; i < am; i++) {
5297       /* off-diagonal portion of A */
5298       ncols_o = bi[i + 1] - bi[i];
5299       for (jo = 0; jo < ncols_o; jo++) {
5300         col = cmap[*bj];
5301         if (col >= cstart) break;
5302         *cam++ = *ba++;
5303         bj++;
5304       }
5305       /* diagonal portion of A */
5306       ncols_d = ai[i + 1] - ai[i];
5307       for (j = 0; j < ncols_d; j++) *cam++ = *aa++;
5308       /* off-diagonal portion of A */
5309       for (j = jo; j < ncols_o; j++) {
5310         *cam++ = *ba++;
5311         bj++;
5312       }
5313     }
5314     PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam));
5315   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5316   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav));
5317   PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav));
5318   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5319   PetscFunctionReturn(PETSC_SUCCESS);
5320 }
5321 
5322 /*@
5323      MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with
5324           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5325 
5326     Not Collective
5327 
5328    Input Parameters:
5329 +    A - the matrix
5330 -    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5331 
5332    Output Parameters:
5333 +    glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5334 -    A_loc - the local sequential matrix generated
5335 
5336     Level: developer
5337 
5338    Note:
5339      This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering)
5340 
5341 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`
5342 @*/
5343 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc)
5344 {
5345   Mat             Ao, Ad;
5346   const PetscInt *cmap;
5347   PetscMPIInt     size;
5348   PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *);
5349 
5350   PetscFunctionBegin;
5351   PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap));
5352   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
5353   if (size == 1) {
5354     if (scall == MAT_INITIAL_MATRIX) {
5355       PetscCall(PetscObjectReference((PetscObject)Ad));
5356       *A_loc = Ad;
5357     } else if (scall == MAT_REUSE_MATRIX) {
5358       PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN));
5359     }
5360     if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob));
5361     PetscFunctionReturn(PETSC_SUCCESS);
5362   }
5363   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f));
5364   PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0));
5365   if (f) {
5366     PetscCall((*f)(A, scall, glob, A_loc));
5367   } else {
5368     Mat_SeqAIJ        *a = (Mat_SeqAIJ *)Ad->data;
5369     Mat_SeqAIJ        *b = (Mat_SeqAIJ *)Ao->data;
5370     Mat_SeqAIJ        *c;
5371     PetscInt          *ai = a->i, *aj = a->j;
5372     PetscInt          *bi = b->i, *bj = b->j;
5373     PetscInt          *ci, *cj;
5374     const PetscScalar *aa, *ba;
5375     PetscScalar       *ca;
5376     PetscInt           i, j, am, dn, on;
5377 
5378     PetscCall(MatGetLocalSize(Ad, &am, &dn));
5379     PetscCall(MatGetLocalSize(Ao, NULL, &on));
5380     PetscCall(MatSeqAIJGetArrayRead(Ad, &aa));
5381     PetscCall(MatSeqAIJGetArrayRead(Ao, &ba));
5382     if (scall == MAT_INITIAL_MATRIX) {
5383       PetscInt k;
5384       PetscCall(PetscMalloc1(1 + am, &ci));
5385       PetscCall(PetscMalloc1(ai[am] + bi[am], &cj));
5386       PetscCall(PetscMalloc1(ai[am] + bi[am], &ca));
5387       ci[0] = 0;
5388       for (i = 0, k = 0; i < am; i++) {
5389         const PetscInt ncols_o = bi[i + 1] - bi[i];
5390         const PetscInt ncols_d = ai[i + 1] - ai[i];
5391         ci[i + 1]              = ci[i] + ncols_o + ncols_d;
5392         /* diagonal portion of A */
5393         for (j = 0; j < ncols_d; j++, k++) {
5394           cj[k] = *aj++;
5395           ca[k] = *aa++;
5396         }
5397         /* off-diagonal portion of A */
5398         for (j = 0; j < ncols_o; j++, k++) {
5399           cj[k] = dn + *bj++;
5400           ca[k] = *ba++;
5401         }
5402       }
5403       /* put together the new matrix */
5404       PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc));
5405       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5406       /* Since these are PETSc arrays, change flags to free them as necessary. */
5407       c          = (Mat_SeqAIJ *)(*A_loc)->data;
5408       c->free_a  = PETSC_TRUE;
5409       c->free_ij = PETSC_TRUE;
5410       c->nonew   = 0;
5411       PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name));
5412     } else if (scall == MAT_REUSE_MATRIX) {
5413       PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca));
5414       for (i = 0; i < am; i++) {
5415         const PetscInt ncols_d = ai[i + 1] - ai[i];
5416         const PetscInt ncols_o = bi[i + 1] - bi[i];
5417         /* diagonal portion of A */
5418         for (j = 0; j < ncols_d; j++) *ca++ = *aa++;
5419         /* off-diagonal portion of A */
5420         for (j = 0; j < ncols_o; j++) *ca++ = *ba++;
5421       }
5422       PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca));
5423     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall);
5424     PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa));
5425     PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa));
5426     if (glob) {
5427       PetscInt cst, *gidx;
5428 
5429       PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL));
5430       PetscCall(PetscMalloc1(dn + on, &gidx));
5431       for (i = 0; i < dn; i++) gidx[i] = cst + i;
5432       for (i = 0; i < on; i++) gidx[i + dn] = cmap[i];
5433       PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob));
5434     }
5435   }
5436   PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0));
5437   PetscFunctionReturn(PETSC_SUCCESS);
5438 }
5439 
5440 /*@C
5441      MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns
5442 
5443     Not Collective
5444 
5445    Input Parameters:
5446 +    A - the matrix
5447 .    scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5448 -    row, col - index sets of rows and columns to extract (or NULL)
5449 
5450    Output Parameter:
5451 .    A_loc - the local sequential matrix generated
5452 
5453     Level: developer
5454 
5455 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`
5456 @*/
5457 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc)
5458 {
5459   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5460   PetscInt    i, start, end, ncols, nzA, nzB, *cmap, imark, *idx;
5461   IS          isrowa, iscola;
5462   Mat        *aloc;
5463   PetscBool   match;
5464 
5465   PetscFunctionBegin;
5466   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match));
5467   PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input");
5468   PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5469   if (!row) {
5470     start = A->rmap->rstart;
5471     end   = A->rmap->rend;
5472     PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa));
5473   } else {
5474     isrowa = *row;
5475   }
5476   if (!col) {
5477     start = A->cmap->rstart;
5478     cmap  = a->garray;
5479     nzA   = a->A->cmap->n;
5480     nzB   = a->B->cmap->n;
5481     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5482     ncols = 0;
5483     for (i = 0; i < nzB; i++) {
5484       if (cmap[i] < start) idx[ncols++] = cmap[i];
5485       else break;
5486     }
5487     imark = i;
5488     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;
5489     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i];
5490     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola));
5491   } else {
5492     iscola = *col;
5493   }
5494   if (scall != MAT_INITIAL_MATRIX) {
5495     PetscCall(PetscMalloc1(1, &aloc));
5496     aloc[0] = *A_loc;
5497   }
5498   PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc));
5499   if (!col) { /* attach global id of condensed columns */
5500     PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola));
5501   }
5502   *A_loc = aloc[0];
5503   PetscCall(PetscFree(aloc));
5504   if (!row) PetscCall(ISDestroy(&isrowa));
5505   if (!col) PetscCall(ISDestroy(&iscola));
5506   PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0));
5507   PetscFunctionReturn(PETSC_SUCCESS);
5508 }
5509 
5510 /*
5511  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5512  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5513  * on a global size.
5514  * */
5515 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth)
5516 {
5517   Mat_MPIAIJ            *p  = (Mat_MPIAIJ *)P->data;
5518   Mat_SeqAIJ            *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth;
5519   PetscInt               plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol;
5520   PetscMPIInt            owner;
5521   PetscSFNode           *iremote, *oiremote;
5522   const PetscInt        *lrowindices;
5523   PetscSF                sf, osf;
5524   PetscInt               pcstart, *roffsets, *loffsets, *pnnz, j;
5525   PetscInt               ontotalcols, dntotalcols, ntotalcols, nout;
5526   MPI_Comm               comm;
5527   ISLocalToGlobalMapping mapping;
5528   const PetscScalar     *pd_a, *po_a;
5529 
5530   PetscFunctionBegin;
5531   PetscCall(PetscObjectGetComm((PetscObject)P, &comm));
5532   /* plocalsize is the number of roots
5533    * nrows is the number of leaves
5534    * */
5535   PetscCall(MatGetLocalSize(P, &plocalsize, NULL));
5536   PetscCall(ISGetLocalSize(rows, &nrows));
5537   PetscCall(PetscCalloc1(nrows, &iremote));
5538   PetscCall(ISGetIndices(rows, &lrowindices));
5539   for (i = 0; i < nrows; i++) {
5540     /* Find a remote index and an owner for a row
5541      * The row could be local or remote
5542      * */
5543     owner = 0;
5544     lidx  = 0;
5545     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx));
5546     iremote[i].index = lidx;
5547     iremote[i].rank  = owner;
5548   }
5549   /* Create SF to communicate how many nonzero columns for each row */
5550   PetscCall(PetscSFCreate(comm, &sf));
5551   /* SF will figure out the number of nonzero colunms for each row, and their
5552    * offsets
5553    * */
5554   PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5555   PetscCall(PetscSFSetFromOptions(sf));
5556   PetscCall(PetscSFSetUp(sf));
5557 
5558   PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets));
5559   PetscCall(PetscCalloc1(2 * plocalsize, &nrcols));
5560   PetscCall(PetscCalloc1(nrows, &pnnz));
5561   roffsets[0] = 0;
5562   roffsets[1] = 0;
5563   for (i = 0; i < plocalsize; i++) {
5564     /* diag */
5565     nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i];
5566     /* off diag */
5567     nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i];
5568     /* compute offsets so that we relative location for each row */
5569     roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0];
5570     roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1];
5571   }
5572   PetscCall(PetscCalloc1(2 * nrows, &nlcols));
5573   PetscCall(PetscCalloc1(2 * nrows, &loffsets));
5574   /* 'r' means root, and 'l' means leaf */
5575   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5576   PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5577   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE));
5578   PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE));
5579   PetscCall(PetscSFDestroy(&sf));
5580   PetscCall(PetscFree(roffsets));
5581   PetscCall(PetscFree(nrcols));
5582   dntotalcols = 0;
5583   ontotalcols = 0;
5584   ncol        = 0;
5585   for (i = 0; i < nrows; i++) {
5586     pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1];
5587     ncol    = PetscMax(pnnz[i], ncol);
5588     /* diag */
5589     dntotalcols += nlcols[i * 2 + 0];
5590     /* off diag */
5591     ontotalcols += nlcols[i * 2 + 1];
5592   }
5593   /* We do not need to figure the right number of columns
5594    * since all the calculations will be done by going through the raw data
5595    * */
5596   PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth));
5597   PetscCall(MatSetUp(*P_oth));
5598   PetscCall(PetscFree(pnnz));
5599   p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5600   /* diag */
5601   PetscCall(PetscCalloc1(dntotalcols, &iremote));
5602   /* off diag */
5603   PetscCall(PetscCalloc1(ontotalcols, &oiremote));
5604   /* diag */
5605   PetscCall(PetscCalloc1(dntotalcols, &ilocal));
5606   /* off diag */
5607   PetscCall(PetscCalloc1(ontotalcols, &oilocal));
5608   dntotalcols = 0;
5609   ontotalcols = 0;
5610   ntotalcols  = 0;
5611   for (i = 0; i < nrows; i++) {
5612     owner = 0;
5613     PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL));
5614     /* Set iremote for diag matrix */
5615     for (j = 0; j < nlcols[i * 2 + 0]; j++) {
5616       iremote[dntotalcols].index = loffsets[i * 2 + 0] + j;
5617       iremote[dntotalcols].rank  = owner;
5618       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5619       ilocal[dntotalcols++] = ntotalcols++;
5620     }
5621     /* off diag */
5622     for (j = 0; j < nlcols[i * 2 + 1]; j++) {
5623       oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j;
5624       oiremote[ontotalcols].rank  = owner;
5625       oilocal[ontotalcols++]      = ntotalcols++;
5626     }
5627   }
5628   PetscCall(ISRestoreIndices(rows, &lrowindices));
5629   PetscCall(PetscFree(loffsets));
5630   PetscCall(PetscFree(nlcols));
5631   PetscCall(PetscSFCreate(comm, &sf));
5632   /* P serves as roots and P_oth is leaves
5633    * Diag matrix
5634    * */
5635   PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
5636   PetscCall(PetscSFSetFromOptions(sf));
5637   PetscCall(PetscSFSetUp(sf));
5638 
5639   PetscCall(PetscSFCreate(comm, &osf));
5640   /* Off diag */
5641   PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER));
5642   PetscCall(PetscSFSetFromOptions(osf));
5643   PetscCall(PetscSFSetUp(osf));
5644   PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5645   PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5646   /* We operate on the matrix internal data for saving memory */
5647   PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5648   PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5649   PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL));
5650   /* Convert to global indices for diag matrix */
5651   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart;
5652   PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5653   /* We want P_oth store global indices */
5654   PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping));
5655   /* Use memory scalable approach */
5656   PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH));
5657   PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j));
5658   PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5659   PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE));
5660   /* Convert back to local indices */
5661   for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart;
5662   PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE));
5663   nout = 0;
5664   PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j));
5665   PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout);
5666   PetscCall(ISLocalToGlobalMappingDestroy(&mapping));
5667   /* Exchange values */
5668   PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5669   PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5670   PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5671   PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5672   /* Stop PETSc from shrinking memory */
5673   for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i];
5674   PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY));
5675   PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY));
5676   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5677   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf));
5678   PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf));
5679   PetscCall(PetscSFDestroy(&sf));
5680   PetscCall(PetscSFDestroy(&osf));
5681   PetscFunctionReturn(PETSC_SUCCESS);
5682 }
5683 
5684 /*
5685  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5686  * This supports MPIAIJ and MAIJ
5687  * */
5688 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth)
5689 {
5690   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data;
5691   Mat_SeqAIJ *p_oth;
5692   IS          rows, map;
5693   PetscHMapI  hamp;
5694   PetscInt    i, htsize, *rowindices, off, *mapping, key, count;
5695   MPI_Comm    comm;
5696   PetscSF     sf, osf;
5697   PetscBool   has;
5698 
5699   PetscFunctionBegin;
5700   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5701   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0));
5702   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5703    *  and then create a submatrix (that often is an overlapping matrix)
5704    * */
5705   if (reuse == MAT_INITIAL_MATRIX) {
5706     /* Use a hash table to figure out unique keys */
5707     PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp));
5708     PetscCall(PetscCalloc1(a->B->cmap->n, &mapping));
5709     count = 0;
5710     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5711     for (i = 0; i < a->B->cmap->n; i++) {
5712       key = a->garray[i] / dof;
5713       PetscCall(PetscHMapIHas(hamp, key, &has));
5714       if (!has) {
5715         mapping[i] = count;
5716         PetscCall(PetscHMapISet(hamp, key, count++));
5717       } else {
5718         /* Current 'i' has the same value the previous step */
5719         mapping[i] = count - 1;
5720       }
5721     }
5722     PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map));
5723     PetscCall(PetscHMapIGetSize(hamp, &htsize));
5724     PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count);
5725     PetscCall(PetscCalloc1(htsize, &rowindices));
5726     off = 0;
5727     PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices));
5728     PetscCall(PetscHMapIDestroy(&hamp));
5729     PetscCall(PetscSortInt(htsize, rowindices));
5730     PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows));
5731     /* In case, the matrix was already created but users want to recreate the matrix */
5732     PetscCall(MatDestroy(P_oth));
5733     PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth));
5734     PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map));
5735     PetscCall(ISDestroy(&map));
5736     PetscCall(ISDestroy(&rows));
5737   } else if (reuse == MAT_REUSE_MATRIX) {
5738     /* If matrix was already created, we simply update values using SF objects
5739      * that as attached to the matrix earlier.
5740      */
5741     const PetscScalar *pd_a, *po_a;
5742 
5743     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf));
5744     PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf));
5745     PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet");
5746     p_oth = (Mat_SeqAIJ *)(*P_oth)->data;
5747     /* Update values in place */
5748     PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a));
5749     PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a));
5750     PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5751     PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5752     PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE));
5753     PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE));
5754     PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a));
5755     PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a));
5756   } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type");
5757   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0));
5758   PetscFunctionReturn(PETSC_SUCCESS);
5759 }
5760 
5761 /*@C
5762   MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A
5763 
5764   Collective
5765 
5766   Input Parameters:
5767 + A - the first matrix in `MATMPIAIJ` format
5768 . B - the second matrix in `MATMPIAIJ` format
5769 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX`
5770 
5771   Output Parameters:
5772 + rowb - On input index sets of rows of B to extract (or NULL), modified on output
5773 . colb - On input index sets of columns of B to extract (or NULL), modified on output
5774 - B_seq - the sequential matrix generated
5775 
5776   Level: developer
5777 
5778 @*/
5779 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq)
5780 {
5781   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
5782   PetscInt   *idx, i, start, ncols, nzA, nzB, *cmap, imark;
5783   IS          isrowb, iscolb;
5784   Mat        *bseq = NULL;
5785 
5786   PetscFunctionBegin;
5787   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5788     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5789   }
5790   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0));
5791 
5792   if (scall == MAT_INITIAL_MATRIX) {
5793     start = A->cmap->rstart;
5794     cmap  = a->garray;
5795     nzA   = a->A->cmap->n;
5796     nzB   = a->B->cmap->n;
5797     PetscCall(PetscMalloc1(nzA + nzB, &idx));
5798     ncols = 0;
5799     for (i = 0; i < nzB; i++) { /* row < local row index */
5800       if (cmap[i] < start) idx[ncols++] = cmap[i];
5801       else break;
5802     }
5803     imark = i;
5804     for (i = 0; i < nzA; i++) idx[ncols++] = start + i;   /* local rows */
5805     for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5806     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb));
5807     PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb));
5808   } else {
5809     PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5810     isrowb = *rowb;
5811     iscolb = *colb;
5812     PetscCall(PetscMalloc1(1, &bseq));
5813     bseq[0] = *B_seq;
5814   }
5815   PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq));
5816   *B_seq = bseq[0];
5817   PetscCall(PetscFree(bseq));
5818   if (!rowb) {
5819     PetscCall(ISDestroy(&isrowb));
5820   } else {
5821     *rowb = isrowb;
5822   }
5823   if (!colb) {
5824     PetscCall(ISDestroy(&iscolb));
5825   } else {
5826     *colb = iscolb;
5827   }
5828   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0));
5829   PetscFunctionReturn(PETSC_SUCCESS);
5830 }
5831 
5832 /*
5833     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5834     of the OFF-DIAGONAL portion of local A
5835 
5836     Collective
5837 
5838    Input Parameters:
5839 +    A,B - the matrices in mpiaij format
5840 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5841 
5842    Output Parameter:
5843 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5844 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5845 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5846 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5847 
5848     Developer Note:
5849     This directly accesses information inside the VecScatter associated with the matrix-vector product
5850      for this matrix. This is not desirable..
5851 
5852     Level: developer
5853 
5854 */
5855 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth)
5856 {
5857   Mat_MPIAIJ        *a = (Mat_MPIAIJ *)A->data;
5858   Mat_SeqAIJ        *b_oth;
5859   VecScatter         ctx;
5860   MPI_Comm           comm;
5861   const PetscMPIInt *rprocs, *sprocs;
5862   const PetscInt    *srow, *rstarts, *sstarts;
5863   PetscInt          *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs;
5864   PetscInt           i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len;
5865   PetscScalar       *b_otha, *bufa, *bufA, *vals = NULL;
5866   MPI_Request       *reqs = NULL, *rwaits = NULL, *swaits = NULL;
5867   PetscMPIInt        size, tag, rank, nreqs;
5868 
5869   PetscFunctionBegin;
5870   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
5871   PetscCallMPI(MPI_Comm_size(comm, &size));
5872 
5873   if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) {
5874     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
5875   }
5876   PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0));
5877   PetscCallMPI(MPI_Comm_rank(comm, &rank));
5878 
5879   if (size == 1) {
5880     startsj_s = NULL;
5881     bufa_ptr  = NULL;
5882     *B_oth    = NULL;
5883     PetscFunctionReturn(PETSC_SUCCESS);
5884   }
5885 
5886   ctx = a->Mvctx;
5887   tag = ((PetscObject)ctx)->tag;
5888 
5889   PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs));
5890   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5891   PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs));
5892   PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs));
5893   PetscCall(PetscMalloc1(nreqs, &reqs));
5894   rwaits = reqs;
5895   swaits = reqs + nrecvs;
5896 
5897   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5898   if (scall == MAT_INITIAL_MATRIX) {
5899     /* i-array */
5900     /*---------*/
5901     /*  post receives */
5902     if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */
5903     for (i = 0; i < nrecvs; i++) {
5904       rowlen = rvalues + rstarts[i] * rbs;
5905       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */
5906       PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5907     }
5908 
5909     /* pack the outgoing message */
5910     PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj));
5911 
5912     sstartsj[0] = 0;
5913     rstartsj[0] = 0;
5914     len         = 0; /* total length of j or a array to be sent */
5915     if (nsends) {
5916       k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5917       PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues));
5918     }
5919     for (i = 0; i < nsends; i++) {
5920       rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs;
5921       nrows  = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5922       for (j = 0; j < nrows; j++) {
5923         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5924         for (l = 0; l < sbs; l++) {
5925           PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */
5926 
5927           rowlen[j * sbs + l] = ncols;
5928 
5929           len += ncols;
5930           PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL));
5931         }
5932         k++;
5933       }
5934       PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i));
5935 
5936       sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5937     }
5938     /* recvs and sends of i-array are completed */
5939     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5940     PetscCall(PetscFree(svalues));
5941 
5942     /* allocate buffers for sending j and a arrays */
5943     PetscCall(PetscMalloc1(len + 1, &bufj));
5944     PetscCall(PetscMalloc1(len + 1, &bufa));
5945 
5946     /* create i-array of B_oth */
5947     PetscCall(PetscMalloc1(aBn + 2, &b_othi));
5948 
5949     b_othi[0] = 0;
5950     len       = 0; /* total length of j or a array to be received */
5951     k         = 0;
5952     for (i = 0; i < nrecvs; i++) {
5953       rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs;
5954       nrows  = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */
5955       for (j = 0; j < nrows; j++) {
5956         b_othi[k + 1] = b_othi[k] + rowlen[j];
5957         PetscCall(PetscIntSumError(rowlen[j], len, &len));
5958         k++;
5959       }
5960       rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5961     }
5962     PetscCall(PetscFree(rvalues));
5963 
5964     /* allocate space for j and a arrays of B_oth */
5965     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj));
5966     PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha));
5967 
5968     /* j-array */
5969     /*---------*/
5970     /*  post receives of j-array */
5971     for (i = 0; i < nrecvs; i++) {
5972       nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
5973       PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i));
5974     }
5975 
5976     /* pack the outgoing message j-array */
5977     if (nsends) k = sstarts[0];
5978     for (i = 0; i < nsends; i++) {
5979       nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
5980       bufJ  = bufj + sstartsj[i];
5981       for (j = 0; j < nrows; j++) {
5982         row = srow[k++] + B->rmap->range[rank]; /* global row idx */
5983         for (ll = 0; ll < sbs; ll++) {
5984           PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5985           for (l = 0; l < ncols; l++) *bufJ++ = cols[l];
5986           PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL));
5987         }
5988       }
5989       PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i));
5990     }
5991 
5992     /* recvs and sends of j-array are completed */
5993     if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
5994   } else if (scall == MAT_REUSE_MATRIX) {
5995     sstartsj = *startsj_s;
5996     rstartsj = *startsj_r;
5997     bufa     = *bufa_ptr;
5998     b_oth    = (Mat_SeqAIJ *)(*B_oth)->data;
5999     PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha));
6000   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
6001 
6002   /* a-array */
6003   /*---------*/
6004   /*  post receives of a-array */
6005   for (i = 0; i < nrecvs; i++) {
6006     nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */
6007     PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i));
6008   }
6009 
6010   /* pack the outgoing message a-array */
6011   if (nsends) k = sstarts[0];
6012   for (i = 0; i < nsends; i++) {
6013     nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */
6014     bufA  = bufa + sstartsj[i];
6015     for (j = 0; j < nrows; j++) {
6016       row = srow[k++] + B->rmap->range[rank]; /* global row idx */
6017       for (ll = 0; ll < sbs; ll++) {
6018         PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6019         for (l = 0; l < ncols; l++) *bufA++ = vals[l];
6020         PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals));
6021       }
6022     }
6023     PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i));
6024   }
6025   /* recvs and sends of a-array are completed */
6026   if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE));
6027   PetscCall(PetscFree(reqs));
6028 
6029   if (scall == MAT_INITIAL_MATRIX) {
6030     /* put together the new matrix */
6031     PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth));
6032 
6033     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
6034     /* Since these are PETSc arrays, change flags to free them as necessary. */
6035     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
6036     b_oth->free_a  = PETSC_TRUE;
6037     b_oth->free_ij = PETSC_TRUE;
6038     b_oth->nonew   = 0;
6039 
6040     PetscCall(PetscFree(bufj));
6041     if (!startsj_s || !bufa_ptr) {
6042       PetscCall(PetscFree2(sstartsj, rstartsj));
6043       PetscCall(PetscFree(bufa_ptr));
6044     } else {
6045       *startsj_s = sstartsj;
6046       *startsj_r = rstartsj;
6047       *bufa_ptr  = bufa;
6048     }
6049   } else if (scall == MAT_REUSE_MATRIX) {
6050     PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha));
6051   }
6052 
6053   PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs));
6054   PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs));
6055   PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0));
6056   PetscFunctionReturn(PETSC_SUCCESS);
6057 }
6058 
6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *);
6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *);
6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *);
6062 #if defined(PETSC_HAVE_MKL_SPARSE)
6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *);
6064 #endif
6065 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *);
6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
6067 #if defined(PETSC_HAVE_ELEMENTAL)
6068 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
6069 #endif
6070 #if defined(PETSC_HAVE_SCALAPACK)
6071 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
6072 #endif
6073 #if defined(PETSC_HAVE_HYPRE)
6074 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
6075 #endif
6076 #if defined(PETSC_HAVE_CUDA)
6077 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *);
6078 #endif
6079 #if defined(PETSC_HAVE_HIP)
6080 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *);
6081 #endif
6082 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6083 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *);
6084 #endif
6085 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *);
6086 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
6087 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
6088 
6089 /*
6090     Computes (B'*A')' since computing B*A directly is untenable
6091 
6092                n                       p                          p
6093         [             ]       [             ]         [                 ]
6094       m [      A      ]  *  n [       B     ]   =   m [         C       ]
6095         [             ]       [             ]         [                 ]
6096 
6097 */
6098 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C)
6099 {
6100   Mat At, Bt, Ct;
6101 
6102   PetscFunctionBegin;
6103   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At));
6104   PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt));
6105   PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct));
6106   PetscCall(MatDestroy(&At));
6107   PetscCall(MatDestroy(&Bt));
6108   PetscCall(MatTransposeSetPrecursor(Ct, C));
6109   PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C));
6110   PetscCall(MatDestroy(&Ct));
6111   PetscFunctionReturn(PETSC_SUCCESS);
6112 }
6113 
6114 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C)
6115 {
6116   PetscBool cisdense;
6117 
6118   PetscFunctionBegin;
6119   PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n);
6120   PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N));
6121   PetscCall(MatSetBlockSizesFromMats(C, A, B));
6122   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, ""));
6123   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
6124   PetscCall(MatSetUp(C));
6125 
6126   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
6127   PetscFunctionReturn(PETSC_SUCCESS);
6128 }
6129 
6130 /* ----------------------------------------------------------------*/
6131 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6132 {
6133   Mat_Product *product = C->product;
6134   Mat          A = product->A, B = product->B;
6135 
6136   PetscFunctionBegin;
6137   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6138     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend);
6139 
6140   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6141   C->ops->productsymbolic = MatProductSymbolic_AB;
6142   PetscFunctionReturn(PETSC_SUCCESS);
6143 }
6144 
6145 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6146 {
6147   Mat_Product *product = C->product;
6148 
6149   PetscFunctionBegin;
6150   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C));
6151   PetscFunctionReturn(PETSC_SUCCESS);
6152 }
6153 
6154 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix
6155 
6156   Input Parameters:
6157 
6158     j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1)
6159     j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2)
6160 
6161     mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat
6162 
6163     For Set1, j1[] contains column indices of the nonzeros.
6164     For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k
6165     respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted,
6166     but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1.
6167 
6168     Similar for Set2.
6169 
6170     This routine merges the two sets of nonzeros row by row and removes repeats.
6171 
6172   Output Parameters: (memory is allocated by the caller)
6173 
6174     i[],j[]: the CSR of the merged matrix, which has m rows.
6175     imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix.
6176     imap2[]: similar to imap1[], but for Set2.
6177     Note we order nonzeros row-by-row and from left to right.
6178 */
6179 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[])
6180 {
6181   PetscInt   r, m; /* Row index of mat */
6182   PetscCount t, t1, t2, b1, e1, b2, e2;
6183 
6184   PetscFunctionBegin;
6185   PetscCall(MatGetLocalSize(mat, &m, NULL));
6186   t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */
6187   i[0]        = 0;
6188   for (r = 0; r < m; r++) { /* Do row by row merging */
6189     b1 = rowBegin1[r];
6190     e1 = rowEnd1[r];
6191     b2 = rowBegin2[r];
6192     e2 = rowEnd2[r];
6193     while (b1 < e1 && b2 < e2) {
6194       if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */
6195         j[t]      = j1[b1];
6196         imap1[t1] = t;
6197         imap2[t2] = t;
6198         b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */
6199         b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */
6200         t1++;
6201         t2++;
6202         t++;
6203       } else if (j1[b1] < j2[b2]) {
6204         j[t]      = j1[b1];
6205         imap1[t1] = t;
6206         b1 += jmap1[t1 + 1] - jmap1[t1];
6207         t1++;
6208         t++;
6209       } else {
6210         j[t]      = j2[b2];
6211         imap2[t2] = t;
6212         b2 += jmap2[t2 + 1] - jmap2[t2];
6213         t2++;
6214         t++;
6215       }
6216     }
6217     /* Merge the remaining in either j1[] or j2[] */
6218     while (b1 < e1) {
6219       j[t]      = j1[b1];
6220       imap1[t1] = t;
6221       b1 += jmap1[t1 + 1] - jmap1[t1];
6222       t1++;
6223       t++;
6224     }
6225     while (b2 < e2) {
6226       j[t]      = j2[b2];
6227       imap2[t2] = t;
6228       b2 += jmap2[t2 + 1] - jmap2[t2];
6229       t2++;
6230       t++;
6231     }
6232     i[r + 1] = t;
6233   }
6234   PetscFunctionReturn(PETSC_SUCCESS);
6235 }
6236 
6237 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block
6238 
6239   Input Parameters:
6240     mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m.
6241     n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[]
6242       respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n.
6243 
6244       i[] is already sorted, but within a row, j[] is not sorted and might have repeats.
6245       i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting.
6246 
6247   Output Parameters:
6248     j[],perm[]: the routine needs to sort j[] within each row along with perm[].
6249     rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller.
6250       They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block,
6251       and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block.
6252 
6253     Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine.
6254       Atot: number of entries belonging to the diagonal block.
6255       Annz: number of unique nonzeros belonging to the diagonal block.
6256       Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count
6257         repeats (i.e., same 'i,j' pair).
6258       Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t]
6259         is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0.
6260 
6261       Atot: number of entries belonging to the diagonal block
6262       Annz: number of unique nonzeros belonging to the diagonal block.
6263 
6264     Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block.
6265 
6266     Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1().
6267 */
6268 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_)
6269 {
6270   PetscInt    cstart, cend, rstart, rend, row, col;
6271   PetscCount  Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */
6272   PetscCount  Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */
6273   PetscCount  k, m, p, q, r, s, mid;
6274   PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap;
6275 
6276   PetscFunctionBegin;
6277   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6278   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6279   m = rend - rstart;
6280 
6281   for (k = 0; k < n; k++) {
6282     if (i[k] >= 0) break;
6283   } /* Skip negative rows */
6284 
6285   /* Process [k,n): sort and partition each local row into diag and offdiag portions,
6286      fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz.
6287   */
6288   while (k < n) {
6289     row = i[k];
6290     /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */
6291     for (s = k; s < n; s++)
6292       if (i[s] != row) break;
6293     for (p = k; p < s; p++) {
6294       if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1]  */
6295       else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]);
6296     }
6297     PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k));
6298     PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */
6299     rowBegin[row - rstart] = k;
6300     rowMid[row - rstart]   = mid;
6301     rowEnd[row - rstart]   = s;
6302 
6303     /* Count nonzeros of this diag/offdiag row, which might have repeats */
6304     Atot += mid - k;
6305     Btot += s - mid;
6306 
6307     /* Count unique nonzeros of this diag/offdiag row */
6308     for (p = k; p < mid;) {
6309       col = j[p];
6310       do {
6311         j[p] += PETSC_MAX_INT;
6312         p++;
6313       } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */
6314       Annz++;
6315     }
6316 
6317     for (p = mid; p < s;) {
6318       col = j[p];
6319       do {
6320         p++;
6321       } while (p < s && j[p] == col);
6322       Bnnz++;
6323     }
6324     k = s;
6325   }
6326 
6327   /* Allocation according to Atot, Btot, Annz, Bnnz */
6328   PetscCall(PetscMalloc1(Atot, &Aperm));
6329   PetscCall(PetscMalloc1(Btot, &Bperm));
6330   PetscCall(PetscMalloc1(Annz + 1, &Ajmap));
6331   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap));
6332 
6333   /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */
6334   Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0;
6335   for (r = 0; r < m; r++) {
6336     k   = rowBegin[r];
6337     mid = rowMid[r];
6338     s   = rowEnd[r];
6339     PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k));
6340     PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid));
6341     Atot += mid - k;
6342     Btot += s - mid;
6343 
6344     /* Scan column indices in this row and find out how many repeats each unique nonzero has */
6345     for (p = k; p < mid;) {
6346       col = j[p];
6347       q   = p;
6348       do {
6349         p++;
6350       } while (p < mid && j[p] == col);
6351       Ajmap[Annz + 1] = Ajmap[Annz] + (p - q);
6352       Annz++;
6353     }
6354 
6355     for (p = mid; p < s;) {
6356       col = j[p];
6357       q   = p;
6358       do {
6359         p++;
6360       } while (p < s && j[p] == col);
6361       Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q);
6362       Bnnz++;
6363     }
6364   }
6365   /* Output */
6366   *Aperm_ = Aperm;
6367   *Annz_  = Annz;
6368   *Atot_  = Atot;
6369   *Ajmap_ = Ajmap;
6370   *Bperm_ = Bperm;
6371   *Bnnz_  = Bnnz;
6372   *Btot_  = Btot;
6373   *Bjmap_ = Bjmap;
6374   PetscFunctionReturn(PETSC_SUCCESS);
6375 }
6376 
6377 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix
6378 
6379   Input Parameters:
6380     nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[]
6381     nnz:  number of unique nonzeros in the merged matrix
6382     imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix
6383     jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set
6384 
6385   Output Parameter: (memory is allocated by the caller)
6386     jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set
6387 
6388   Example:
6389     nnz1 = 4
6390     nnz  = 6
6391     imap = [1,3,4,5]
6392     jmap = [0,3,5,6,7]
6393    then,
6394     jmap_new = [0,0,3,3,5,6,7]
6395 */
6396 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[])
6397 {
6398   PetscCount k, p;
6399 
6400   PetscFunctionBegin;
6401   jmap_new[0] = 0;
6402   p           = nnz;                /* p loops over jmap_new[] backwards */
6403   for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */
6404     for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1];
6405   }
6406   for (; p >= 0; p--) jmap_new[p] = jmap[0];
6407   PetscFunctionReturn(PETSC_SUCCESS);
6408 }
6409 
6410 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[])
6411 {
6412   MPI_Comm    comm;
6413   PetscMPIInt rank, size;
6414   PetscInt    m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */
6415   PetscCount  k, p, q, rem;                           /* Loop variables over coo arrays */
6416   Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data;
6417 
6418   PetscFunctionBegin;
6419   PetscCall(PetscFree(mpiaij->garray));
6420   PetscCall(VecDestroy(&mpiaij->lvec));
6421 #if defined(PETSC_USE_CTABLE)
6422   PetscCall(PetscHMapIDestroy(&mpiaij->colmap));
6423 #else
6424   PetscCall(PetscFree(mpiaij->colmap));
6425 #endif
6426   PetscCall(VecScatterDestroy(&mpiaij->Mvctx));
6427   mat->assembled     = PETSC_FALSE;
6428   mat->was_assembled = PETSC_FALSE;
6429   PetscCall(MatResetPreallocationCOO_MPIAIJ(mat));
6430 
6431   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
6432   PetscCallMPI(MPI_Comm_size(comm, &size));
6433   PetscCallMPI(MPI_Comm_rank(comm, &rank));
6434   PetscCall(PetscLayoutSetUp(mat->rmap));
6435   PetscCall(PetscLayoutSetUp(mat->cmap));
6436   PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend));
6437   PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend));
6438   PetscCall(MatGetLocalSize(mat, &m, &n));
6439   PetscCall(MatGetSize(mat, &M, &N));
6440 
6441   /* ---------------------------------------------------------------------------*/
6442   /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */
6443   /* entries come first, then local rows, then remote rows.                     */
6444   /* ---------------------------------------------------------------------------*/
6445   PetscCount n1 = coo_n, *perm1;
6446   PetscInt  *i1 = coo_i, *j1 = coo_j;
6447 
6448   PetscCall(PetscMalloc1(n1, &perm1));
6449   for (k = 0; k < n1; k++) perm1[k] = k;
6450 
6451   /* Manipulate indices so that entries with negative row or col indices will have smallest
6452      row indices, local entries will have greater but negative row indices, and remote entries
6453      will have positive row indices.
6454   */
6455   for (k = 0; k < n1; k++) {
6456     if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT;                /* e.g., -2^31, minimal to move them ahead */
6457     else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */
6458     else {
6459       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows");
6460       if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */
6461     }
6462   }
6463 
6464   /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */
6465   PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1));
6466   for (k = 0; k < n1; k++) {
6467     if (i1[k] > PETSC_MIN_INT) break;
6468   }                                                                               /* Advance k to the first entry we need to take care of */
6469   PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */
6470   for (; k < rem; k++) i1[k] += PETSC_MAX_INT;                                    /* Revert row indices of local rows*/
6471 
6472   /* ---------------------------------------------------------------------------*/
6473   /*           Split local rows into diag/offdiag portions                      */
6474   /* ---------------------------------------------------------------------------*/
6475   PetscCount *rowBegin1, *rowMid1, *rowEnd1;
6476   PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1;
6477   PetscCount  Annz1, Bnnz1, Atot1, Btot1;
6478 
6479   PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1));
6480   PetscCall(PetscMalloc1(n1 - rem, &Cperm1));
6481   PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1));
6482 
6483   /* ---------------------------------------------------------------------------*/
6484   /*           Send remote rows to their owner                                  */
6485   /* ---------------------------------------------------------------------------*/
6486   /* Find which rows should be sent to which remote ranks*/
6487   PetscInt        nsend = 0; /* Number of MPI ranks to send data to */
6488   PetscMPIInt    *sendto;    /* [nsend], storing remote ranks */
6489   PetscInt       *nentries;  /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */
6490   const PetscInt *ranges;
6491   PetscInt        maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */
6492 
6493   PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges));
6494   PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries));
6495   for (k = rem; k < n1;) {
6496     PetscMPIInt owner;
6497     PetscInt    firstRow, lastRow;
6498 
6499     /* Locate a row range */
6500     firstRow = i1[k]; /* first row of this owner */
6501     PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner));
6502     lastRow = ranges[owner + 1] - 1; /* last row of this owner */
6503 
6504     /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */
6505     PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p));
6506 
6507     /* All entries in [k,p) belong to this remote owner */
6508     if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */
6509       PetscMPIInt *sendto2;
6510       PetscInt    *nentries2;
6511       PetscInt     maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size;
6512 
6513       PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2));
6514       PetscCall(PetscArraycpy(sendto2, sendto, maxNsend));
6515       PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1));
6516       PetscCall(PetscFree2(sendto, nentries2));
6517       sendto   = sendto2;
6518       nentries = nentries2;
6519       maxNsend = maxNsend2;
6520     }
6521     sendto[nsend]   = owner;
6522     nentries[nsend] = p - k;
6523     PetscCall(PetscCountCast(p - k, &nentries[nsend]));
6524     nsend++;
6525     k = p;
6526   }
6527 
6528   /* Build 1st SF to know offsets on remote to send data */
6529   PetscSF      sf1;
6530   PetscInt     nroots = 1, nroots2 = 0;
6531   PetscInt     nleaves = nsend, nleaves2 = 0;
6532   PetscInt    *offsets;
6533   PetscSFNode *iremote;
6534 
6535   PetscCall(PetscSFCreate(comm, &sf1));
6536   PetscCall(PetscMalloc1(nsend, &iremote));
6537   PetscCall(PetscMalloc1(nsend, &offsets));
6538   for (k = 0; k < nsend; k++) {
6539     iremote[k].rank  = sendto[k];
6540     iremote[k].index = 0;
6541     nleaves2 += nentries[k];
6542     PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt");
6543   }
6544   PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6545   PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM));
6546   PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */
6547   PetscCall(PetscSFDestroy(&sf1));
6548   PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem);
6549 
6550   /* Build 2nd SF to send remote COOs to their owner */
6551   PetscSF sf2;
6552   nroots  = nroots2;
6553   nleaves = nleaves2;
6554   PetscCall(PetscSFCreate(comm, &sf2));
6555   PetscCall(PetscSFSetFromOptions(sf2));
6556   PetscCall(PetscMalloc1(nleaves, &iremote));
6557   p = 0;
6558   for (k = 0; k < nsend; k++) {
6559     PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt");
6560     for (q = 0; q < nentries[k]; q++, p++) {
6561       iremote[p].rank  = sendto[k];
6562       iremote[p].index = offsets[k] + q;
6563     }
6564   }
6565   PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
6566 
6567   /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */
6568   PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem));
6569 
6570   /* Send the remote COOs to their owner */
6571   PetscInt    n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */
6572   PetscCount *perm2;                 /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */
6573   PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2));
6574   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE));
6575   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE));
6576   PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE));
6577   PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE));
6578 
6579   PetscCall(PetscFree(offsets));
6580   PetscCall(PetscFree2(sendto, nentries));
6581 
6582   /* ---------------------------------------------------------------*/
6583   /* Sort received COOs by row along with the permutation array     */
6584   /* ---------------------------------------------------------------*/
6585   for (k = 0; k < n2; k++) perm2[k] = k;
6586   PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2));
6587 
6588   /* ---------------------------------------------------------------*/
6589   /* Split received COOs into diag/offdiag portions                 */
6590   /* ---------------------------------------------------------------*/
6591   PetscCount *rowBegin2, *rowMid2, *rowEnd2;
6592   PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2;
6593   PetscCount  Annz2, Bnnz2, Atot2, Btot2;
6594 
6595   PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2));
6596   PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2));
6597 
6598   /* --------------------------------------------------------------------------*/
6599   /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */
6600   /* --------------------------------------------------------------------------*/
6601   PetscInt *Ai, *Bi;
6602   PetscInt *Aj, *Bj;
6603 
6604   PetscCall(PetscMalloc1(m + 1, &Ai));
6605   PetscCall(PetscMalloc1(m + 1, &Bi));
6606   PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */
6607   PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj));
6608 
6609   PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2;
6610   PetscCall(PetscMalloc1(Annz1, &Aimap1));
6611   PetscCall(PetscMalloc1(Bnnz1, &Bimap1));
6612   PetscCall(PetscMalloc1(Annz2, &Aimap2));
6613   PetscCall(PetscMalloc1(Bnnz2, &Bimap2));
6614 
6615   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj));
6616   PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj));
6617 
6618   /* --------------------------------------------------------------------------*/
6619   /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we     */
6620   /* expect nonzeros in A/B most likely have local contributing entries        */
6621   /* --------------------------------------------------------------------------*/
6622   PetscInt    Annz = Ai[m];
6623   PetscInt    Bnnz = Bi[m];
6624   PetscCount *Ajmap1_new, *Bjmap1_new;
6625 
6626   PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new));
6627   PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new));
6628 
6629   PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new));
6630   PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new));
6631 
6632   PetscCall(PetscFree(Aimap1));
6633   PetscCall(PetscFree(Ajmap1));
6634   PetscCall(PetscFree(Bimap1));
6635   PetscCall(PetscFree(Bjmap1));
6636   PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1));
6637   PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2));
6638   PetscCall(PetscFree(perm1));
6639   PetscCall(PetscFree3(i2, j2, perm2));
6640 
6641   Ajmap1 = Ajmap1_new;
6642   Bjmap1 = Bjmap1_new;
6643 
6644   /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */
6645   if (Annz < Annz1 + Annz2) {
6646     PetscInt *Aj_new;
6647     PetscCall(PetscMalloc1(Annz, &Aj_new));
6648     PetscCall(PetscArraycpy(Aj_new, Aj, Annz));
6649     PetscCall(PetscFree(Aj));
6650     Aj = Aj_new;
6651   }
6652 
6653   if (Bnnz < Bnnz1 + Bnnz2) {
6654     PetscInt *Bj_new;
6655     PetscCall(PetscMalloc1(Bnnz, &Bj_new));
6656     PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz));
6657     PetscCall(PetscFree(Bj));
6658     Bj = Bj_new;
6659   }
6660 
6661   /* --------------------------------------------------------------------------------*/
6662   /* Create new submatrices for on-process and off-process coupling                  */
6663   /* --------------------------------------------------------------------------------*/
6664   PetscScalar *Aa, *Ba;
6665   MatType      rtype;
6666   Mat_SeqAIJ  *a, *b;
6667   PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */
6668   PetscCall(PetscCalloc1(Bnnz, &Ba));
6669   /* make Aj[] local, i.e, based off the start column of the diagonal portion */
6670   if (cstart) {
6671     for (k = 0; k < Annz; k++) Aj[k] -= cstart;
6672   }
6673   PetscCall(MatDestroy(&mpiaij->A));
6674   PetscCall(MatDestroy(&mpiaij->B));
6675   PetscCall(MatGetRootType_Private(mat, &rtype));
6676   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A));
6677   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B));
6678   PetscCall(MatSetUpMultiply_MPIAIJ(mat));
6679 
6680   a               = (Mat_SeqAIJ *)mpiaij->A->data;
6681   b               = (Mat_SeqAIJ *)mpiaij->B->data;
6682   a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */
6683   a->free_a = b->free_a = PETSC_TRUE;
6684   a->free_ij = b->free_ij = PETSC_TRUE;
6685 
6686   /* conversion must happen AFTER multiply setup */
6687   PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A));
6688   PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B));
6689   PetscCall(VecDestroy(&mpiaij->lvec));
6690   PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL));
6691 
6692   mpiaij->coo_n   = coo_n;
6693   mpiaij->coo_sf  = sf2;
6694   mpiaij->sendlen = nleaves;
6695   mpiaij->recvlen = nroots;
6696 
6697   mpiaij->Annz = Annz;
6698   mpiaij->Bnnz = Bnnz;
6699 
6700   mpiaij->Annz2 = Annz2;
6701   mpiaij->Bnnz2 = Bnnz2;
6702 
6703   mpiaij->Atot1 = Atot1;
6704   mpiaij->Atot2 = Atot2;
6705   mpiaij->Btot1 = Btot1;
6706   mpiaij->Btot2 = Btot2;
6707 
6708   mpiaij->Ajmap1 = Ajmap1;
6709   mpiaij->Aperm1 = Aperm1;
6710 
6711   mpiaij->Bjmap1 = Bjmap1;
6712   mpiaij->Bperm1 = Bperm1;
6713 
6714   mpiaij->Aimap2 = Aimap2;
6715   mpiaij->Ajmap2 = Ajmap2;
6716   mpiaij->Aperm2 = Aperm2;
6717 
6718   mpiaij->Bimap2 = Bimap2;
6719   mpiaij->Bjmap2 = Bjmap2;
6720   mpiaij->Bperm2 = Bperm2;
6721 
6722   mpiaij->Cperm1 = Cperm1;
6723 
6724   /* Allocate in preallocation. If not used, it has zero cost on host */
6725   PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf));
6726   PetscFunctionReturn(PETSC_SUCCESS);
6727 }
6728 
6729 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode)
6730 {
6731   Mat_MPIAIJ       *mpiaij = (Mat_MPIAIJ *)mat->data;
6732   Mat               A = mpiaij->A, B = mpiaij->B;
6733   PetscCount        Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2;
6734   PetscScalar      *Aa, *Ba;
6735   PetscScalar      *sendbuf = mpiaij->sendbuf;
6736   PetscScalar      *recvbuf = mpiaij->recvbuf;
6737   const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2;
6738   const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2;
6739   const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2;
6740   const PetscCount *Cperm1 = mpiaij->Cperm1;
6741 
6742   PetscFunctionBegin;
6743   PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */
6744   PetscCall(MatSeqAIJGetArray(B, &Ba));
6745 
6746   /* Pack entries to be sent to remote */
6747   for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]];
6748 
6749   /* Send remote entries to their owner and overlap the communication with local computation */
6750   PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE));
6751   /* Add local entries to A and B */
6752   for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */
6753     PetscScalar sum = 0.0;                /* Do partial summation first to improve numerical stability */
6754     for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]];
6755     Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum;
6756   }
6757   for (PetscCount i = 0; i < Bnnz; i++) {
6758     PetscScalar sum = 0.0;
6759     for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]];
6760     Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum;
6761   }
6762   PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE));
6763 
6764   /* Add received remote entries to A and B */
6765   for (PetscCount i = 0; i < Annz2; i++) {
6766     for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]];
6767   }
6768   for (PetscCount i = 0; i < Bnnz2; i++) {
6769     for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]];
6770   }
6771   PetscCall(MatSeqAIJRestoreArray(A, &Aa));
6772   PetscCall(MatSeqAIJRestoreArray(B, &Ba));
6773   PetscFunctionReturn(PETSC_SUCCESS);
6774 }
6775 
6776 /*MC
6777    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6778 
6779    Options Database Keys:
6780 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()`
6781 
6782    Level: beginner
6783 
6784    Notes:
6785     `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values,
6786     in this case the values associated with the rows and columns one passes in are set to zero
6787     in the matrix
6788 
6789     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
6790     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
6791 
6792 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()`
6793 M*/
6794 
6795 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6796 {
6797   Mat_MPIAIJ *b;
6798   PetscMPIInt size;
6799 
6800   PetscFunctionBegin;
6801   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
6802 
6803   PetscCall(PetscNew(&b));
6804   B->data = (void *)b;
6805   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
6806   B->assembled  = PETSC_FALSE;
6807   B->insertmode = NOT_SET_VALUES;
6808   b->size       = size;
6809 
6810   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
6811 
6812   /* build cache for off array entries formed */
6813   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
6814 
6815   b->donotstash  = PETSC_FALSE;
6816   b->colmap      = NULL;
6817   b->garray      = NULL;
6818   b->roworiented = PETSC_TRUE;
6819 
6820   /* stuff used for matrix vector multiply */
6821   b->lvec  = NULL;
6822   b->Mvctx = NULL;
6823 
6824   /* stuff for MatGetRow() */
6825   b->rowindices   = NULL;
6826   b->rowvalues    = NULL;
6827   b->getrowactive = PETSC_FALSE;
6828 
6829   /* flexible pointer used in CUSPARSE classes */
6830   b->spptr = NULL;
6831 
6832   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ));
6833   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ));
6834   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ));
6835   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ));
6836   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ));
6837   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ));
6838   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ));
6839   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ));
6840   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM));
6841   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL));
6842 #if defined(PETSC_HAVE_CUDA)
6843   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE));
6844 #endif
6845 #if defined(PETSC_HAVE_HIP)
6846   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE));
6847 #endif
6848 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6849   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos));
6850 #endif
6851 #if defined(PETSC_HAVE_MKL_SPARSE)
6852   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL));
6853 #endif
6854   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL));
6855   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ));
6856   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ));
6857   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense));
6858 #if defined(PETSC_HAVE_ELEMENTAL)
6859   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental));
6860 #endif
6861 #if defined(PETSC_HAVE_SCALAPACK)
6862   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK));
6863 #endif
6864   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS));
6865   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL));
6866 #if defined(PETSC_HAVE_HYPRE)
6867   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE));
6868   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ));
6869 #endif
6870   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ));
6871   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ));
6872   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ));
6873   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ));
6874   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ));
6875   PetscFunctionReturn(PETSC_SUCCESS);
6876 }
6877 
6878 /*@C
6879      MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal"
6880          and "off-diagonal" part of the matrix in CSR format.
6881 
6882    Collective
6883 
6884    Input Parameters:
6885 +  comm - MPI communicator
6886 .  m - number of local rows (Cannot be `PETSC_DECIDE`)
6887 .  n - This value should be the same as the local size used in creating the
6888        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
6889        calculated if N is given) For square matrices n is almost always m.
6890 .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
6891 .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
6892 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6893 .   j - column indices, which must be local, i.e., based off the start column of the diagonal portion
6894 .   a - matrix values
6895 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6896 .   oj - column indices, which must be global, representing global columns in the MPIAIJ matrix
6897 -   oa - matrix values
6898 
6899    Output Parameter:
6900 .   mat - the matrix
6901 
6902    Level: advanced
6903 
6904    Notes:
6905        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6906        must free the arrays once the matrix has been destroyed and not before.
6907 
6908        The i and j indices are 0 based
6909 
6910        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6911 
6912        This sets local rows and cannot be used to set off-processor values.
6913 
6914        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6915        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6916        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6917        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6918        keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all
6919        communication if it is known that only local entries will be set.
6920 
6921 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
6922           `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()`
6923 @*/
6924 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat)
6925 {
6926   Mat_MPIAIJ *maij;
6927 
6928   PetscFunctionBegin;
6929   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
6930   PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
6931   PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0");
6932   PetscCall(MatCreate(comm, mat));
6933   PetscCall(MatSetSizes(*mat, m, n, M, N));
6934   PetscCall(MatSetType(*mat, MATMPIAIJ));
6935   maij = (Mat_MPIAIJ *)(*mat)->data;
6936 
6937   (*mat)->preallocated = PETSC_TRUE;
6938 
6939   PetscCall(PetscLayoutSetUp((*mat)->rmap));
6940   PetscCall(PetscLayoutSetUp((*mat)->cmap));
6941 
6942   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A));
6943   PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B));
6944 
6945   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
6946   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
6947   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
6948   PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE));
6949   PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
6950   PetscFunctionReturn(PETSC_SUCCESS);
6951 }
6952 
6953 typedef struct {
6954   Mat       *mp;    /* intermediate products */
6955   PetscBool *mptmp; /* is the intermediate product temporary ? */
6956   PetscInt   cp;    /* number of intermediate products */
6957 
6958   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6959   PetscInt    *startsj_s, *startsj_r;
6960   PetscScalar *bufa;
6961   Mat          P_oth;
6962 
6963   /* may take advantage of merging product->B */
6964   Mat Bloc; /* B-local by merging diag and off-diag */
6965 
6966   /* cusparse does not have support to split between symbolic and numeric phases.
6967      When api_user is true, we don't need to update the numerical values
6968      of the temporary storage */
6969   PetscBool reusesym;
6970 
6971   /* support for COO values insertion */
6972   PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6973   PetscInt   **own;           /* own[i] points to address of on-process COO indices for Mat mp[i] */
6974   PetscInt   **off;           /* off[i] points to address of off-process COO indices for Mat mp[i] */
6975   PetscBool    hasoffproc;    /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6976   PetscSF      sf;            /* used for non-local values insertion and memory malloc */
6977   PetscMemType mtype;
6978 
6979   /* customization */
6980   PetscBool abmerge;
6981   PetscBool P_oth_bind;
6982 } MatMatMPIAIJBACKEND;
6983 
6984 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6985 {
6986   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data;
6987   PetscInt             i;
6988 
6989   PetscFunctionBegin;
6990   PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r));
6991   PetscCall(PetscFree(mmdata->bufa));
6992   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v));
6993   PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w));
6994   PetscCall(MatDestroy(&mmdata->P_oth));
6995   PetscCall(MatDestroy(&mmdata->Bloc));
6996   PetscCall(PetscSFDestroy(&mmdata->sf));
6997   for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i]));
6998   PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp));
6999   PetscCall(PetscFree(mmdata->own[0]));
7000   PetscCall(PetscFree(mmdata->own));
7001   PetscCall(PetscFree(mmdata->off[0]));
7002   PetscCall(PetscFree(mmdata->off));
7003   PetscCall(PetscFree(mmdata));
7004   PetscFunctionReturn(PETSC_SUCCESS);
7005 }
7006 
7007 /* Copy selected n entries with indices in idx[] of A to v[].
7008    If idx is NULL, copy the whole data array of A to v[]
7009  */
7010 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
7011 {
7012   PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]);
7013 
7014   PetscFunctionBegin;
7015   PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f));
7016   if (f) {
7017     PetscCall((*f)(A, n, idx, v));
7018   } else {
7019     const PetscScalar *vv;
7020 
7021     PetscCall(MatSeqAIJGetArrayRead(A, &vv));
7022     if (n && idx) {
7023       PetscScalar    *w  = v;
7024       const PetscInt *oi = idx;
7025       PetscInt        j;
7026 
7027       for (j = 0; j < n; j++) *w++ = vv[*oi++];
7028     } else {
7029       PetscCall(PetscArraycpy(v, vv, n));
7030     }
7031     PetscCall(MatSeqAIJRestoreArrayRead(A, &vv));
7032   }
7033   PetscFunctionReturn(PETSC_SUCCESS);
7034 }
7035 
7036 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
7037 {
7038   MatMatMPIAIJBACKEND *mmdata;
7039   PetscInt             i, n_d, n_o;
7040 
7041   PetscFunctionBegin;
7042   MatCheckProduct(C, 1);
7043   PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
7044   mmdata = (MatMatMPIAIJBACKEND *)C->product->data;
7045   if (!mmdata->reusesym) { /* update temporary matrices */
7046     if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7047     if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc));
7048   }
7049   mmdata->reusesym = PETSC_FALSE;
7050 
7051   for (i = 0; i < mmdata->cp; i++) {
7052     PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]);
7053     PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]));
7054   }
7055   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
7056     PetscInt noff = mmdata->off[i + 1] - mmdata->off[i];
7057 
7058     if (mmdata->mptmp[i]) continue;
7059     if (noff) {
7060       PetscInt nown = mmdata->own[i + 1] - mmdata->own[i];
7061 
7062       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o));
7063       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d));
7064       n_o += noff;
7065       n_d += nown;
7066     } else {
7067       Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data;
7068 
7069       PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d));
7070       n_d += mm->nz;
7071     }
7072   }
7073   if (mmdata->hasoffproc) { /* offprocess insertion */
7074     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7075     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d));
7076   }
7077   PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES));
7078   PetscFunctionReturn(PETSC_SUCCESS);
7079 }
7080 
7081 /* Support for Pt * A, A * P, or Pt * A * P */
7082 #define MAX_NUMBER_INTERMEDIATE 4
7083 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
7084 {
7085   Mat_Product           *product = C->product;
7086   Mat                    A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
7087   Mat_MPIAIJ            *a, *p;
7088   MatMatMPIAIJBACKEND   *mmdata;
7089   ISLocalToGlobalMapping P_oth_l2g = NULL;
7090   IS                     glob      = NULL;
7091   const char            *prefix;
7092   char                   pprefix[256];
7093   const PetscInt        *globidx, *P_oth_idx;
7094   PetscInt               i, j, cp, m, n, M, N, *coo_i, *coo_j;
7095   PetscCount             ncoo, ncoo_d, ncoo_o, ncoo_oown;
7096   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
7097                                                                                          /* type-0: consecutive, start from 0; type-1: consecutive with */
7098                                                                                          /* a base offset; type-2: sparse with a local to global map table */
7099   const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE];       /* col/row local to global map array (table) for type-2 map type */
7100 
7101   MatProductType ptype;
7102   PetscBool      mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk;
7103   PetscMPIInt    size;
7104 
7105   PetscFunctionBegin;
7106   MatCheckProduct(C, 1);
7107   PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
7108   ptype = product->type;
7109   if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) {
7110     ptype                                          = MATPRODUCT_AB;
7111     product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE;
7112   }
7113   switch (ptype) {
7114   case MATPRODUCT_AB:
7115     A          = product->A;
7116     P          = product->B;
7117     m          = A->rmap->n;
7118     n          = P->cmap->n;
7119     M          = A->rmap->N;
7120     N          = P->cmap->N;
7121     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
7122     break;
7123   case MATPRODUCT_AtB:
7124     P          = product->A;
7125     A          = product->B;
7126     m          = P->cmap->n;
7127     n          = A->cmap->n;
7128     M          = P->cmap->N;
7129     N          = A->cmap->N;
7130     hasoffproc = PETSC_TRUE;
7131     break;
7132   case MATPRODUCT_PtAP:
7133     A          = product->A;
7134     P          = product->B;
7135     m          = P->cmap->n;
7136     n          = P->cmap->n;
7137     M          = P->cmap->N;
7138     N          = P->cmap->N;
7139     hasoffproc = PETSC_TRUE;
7140     break;
7141   default:
7142     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7143   }
7144   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size));
7145   if (size == 1) hasoffproc = PETSC_FALSE;
7146 
7147   /* defaults */
7148   for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) {
7149     mp[i]    = NULL;
7150     mptmp[i] = PETSC_FALSE;
7151     rmapt[i] = -1;
7152     cmapt[i] = -1;
7153     rmapa[i] = NULL;
7154     cmapa[i] = NULL;
7155   }
7156 
7157   /* customization */
7158   PetscCall(PetscNew(&mmdata));
7159   mmdata->reusesym = product->api_user;
7160   if (ptype == MATPRODUCT_AB) {
7161     if (product->api_user) {
7162       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat");
7163       PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7164       PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7165       PetscOptionsEnd();
7166     } else {
7167       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat");
7168       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL));
7169       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7170       PetscOptionsEnd();
7171     }
7172   } else if (ptype == MATPRODUCT_PtAP) {
7173     if (product->api_user) {
7174       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat");
7175       PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7176       PetscOptionsEnd();
7177     } else {
7178       PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat");
7179       PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL));
7180       PetscOptionsEnd();
7181     }
7182   }
7183   a = (Mat_MPIAIJ *)A->data;
7184   p = (Mat_MPIAIJ *)P->data;
7185   PetscCall(MatSetSizes(C, m, n, M, N));
7186   PetscCall(PetscLayoutSetUp(C->rmap));
7187   PetscCall(PetscLayoutSetUp(C->cmap));
7188   PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
7189   PetscCall(MatGetOptionsPrefix(C, &prefix));
7190 
7191   cp = 0;
7192   switch (ptype) {
7193   case MATPRODUCT_AB: /* A * P */
7194     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7195 
7196     /* A_diag * P_local (merged or not) */
7197     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
7198       /* P is product->B */
7199       PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7200       PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7201       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7202       PetscCall(MatProductSetFill(mp[cp], product->fill));
7203       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7204       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7205       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7206       mp[cp]->product->api_user = product->api_user;
7207       PetscCall(MatProductSetFromOptions(mp[cp]));
7208       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7209       PetscCall(ISGetIndices(glob, &globidx));
7210       rmapt[cp] = 1;
7211       cmapt[cp] = 2;
7212       cmapa[cp] = globidx;
7213       mptmp[cp] = PETSC_FALSE;
7214       cp++;
7215     } else { /* A_diag * P_diag and A_diag * P_off */
7216       PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp]));
7217       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7218       PetscCall(MatProductSetFill(mp[cp], product->fill));
7219       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7220       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7221       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7222       mp[cp]->product->api_user = product->api_user;
7223       PetscCall(MatProductSetFromOptions(mp[cp]));
7224       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7225       rmapt[cp] = 1;
7226       cmapt[cp] = 1;
7227       mptmp[cp] = PETSC_FALSE;
7228       cp++;
7229       PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp]));
7230       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7231       PetscCall(MatProductSetFill(mp[cp], product->fill));
7232       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7233       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7234       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7235       mp[cp]->product->api_user = product->api_user;
7236       PetscCall(MatProductSetFromOptions(mp[cp]));
7237       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7238       rmapt[cp] = 1;
7239       cmapt[cp] = 2;
7240       cmapa[cp] = p->garray;
7241       mptmp[cp] = PETSC_FALSE;
7242       cp++;
7243     }
7244 
7245     /* A_off * P_other */
7246     if (mmdata->P_oth) {
7247       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */
7248       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7249       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7250       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7251       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7252       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7253       PetscCall(MatProductSetFill(mp[cp], product->fill));
7254       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7255       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7256       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7257       mp[cp]->product->api_user = product->api_user;
7258       PetscCall(MatProductSetFromOptions(mp[cp]));
7259       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7260       rmapt[cp] = 1;
7261       cmapt[cp] = 2;
7262       cmapa[cp] = P_oth_idx;
7263       mptmp[cp] = PETSC_FALSE;
7264       cp++;
7265     }
7266     break;
7267 
7268   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
7269     /* A is product->B */
7270     PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7271     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
7272       PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp]));
7273       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7274       PetscCall(MatProductSetFill(mp[cp], product->fill));
7275       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7276       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7277       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7278       mp[cp]->product->api_user = product->api_user;
7279       PetscCall(MatProductSetFromOptions(mp[cp]));
7280       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7281       PetscCall(ISGetIndices(glob, &globidx));
7282       rmapt[cp] = 2;
7283       rmapa[cp] = globidx;
7284       cmapt[cp] = 2;
7285       cmapa[cp] = globidx;
7286       mptmp[cp] = PETSC_FALSE;
7287       cp++;
7288     } else {
7289       PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp]));
7290       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7291       PetscCall(MatProductSetFill(mp[cp], product->fill));
7292       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7293       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7294       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7295       mp[cp]->product->api_user = product->api_user;
7296       PetscCall(MatProductSetFromOptions(mp[cp]));
7297       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7298       PetscCall(ISGetIndices(glob, &globidx));
7299       rmapt[cp] = 1;
7300       cmapt[cp] = 2;
7301       cmapa[cp] = globidx;
7302       mptmp[cp] = PETSC_FALSE;
7303       cp++;
7304       PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp]));
7305       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7306       PetscCall(MatProductSetFill(mp[cp], product->fill));
7307       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7308       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7309       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7310       mp[cp]->product->api_user = product->api_user;
7311       PetscCall(MatProductSetFromOptions(mp[cp]));
7312       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7313       rmapt[cp] = 2;
7314       rmapa[cp] = p->garray;
7315       cmapt[cp] = 2;
7316       cmapa[cp] = globidx;
7317       mptmp[cp] = PETSC_FALSE;
7318       cp++;
7319     }
7320     break;
7321   case MATPRODUCT_PtAP:
7322     PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth));
7323     /* P is product->B */
7324     PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc));
7325     PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp]));
7326     PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP));
7327     PetscCall(MatProductSetFill(mp[cp], product->fill));
7328     PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7329     PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7330     PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7331     mp[cp]->product->api_user = product->api_user;
7332     PetscCall(MatProductSetFromOptions(mp[cp]));
7333     PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7334     PetscCall(ISGetIndices(glob, &globidx));
7335     rmapt[cp] = 2;
7336     rmapa[cp] = globidx;
7337     cmapt[cp] = 2;
7338     cmapa[cp] = globidx;
7339     mptmp[cp] = PETSC_FALSE;
7340     cp++;
7341     if (mmdata->P_oth) {
7342       PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g));
7343       PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx));
7344       PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name));
7345       PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind));
7346       PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp]));
7347       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB));
7348       PetscCall(MatProductSetFill(mp[cp], product->fill));
7349       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7350       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7351       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7352       mp[cp]->product->api_user = product->api_user;
7353       PetscCall(MatProductSetFromOptions(mp[cp]));
7354       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7355       mptmp[cp] = PETSC_TRUE;
7356       cp++;
7357       PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp]));
7358       PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB));
7359       PetscCall(MatProductSetFill(mp[cp], product->fill));
7360       PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp));
7361       PetscCall(MatSetOptionsPrefix(mp[cp], prefix));
7362       PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix));
7363       mp[cp]->product->api_user = product->api_user;
7364       PetscCall(MatProductSetFromOptions(mp[cp]));
7365       PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp]));
7366       rmapt[cp] = 2;
7367       rmapa[cp] = globidx;
7368       cmapt[cp] = 2;
7369       cmapa[cp] = P_oth_idx;
7370       mptmp[cp] = PETSC_FALSE;
7371       cp++;
7372     }
7373     break;
7374   default:
7375     SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]);
7376   }
7377   /* sanity check */
7378   if (size > 1)
7379     for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i);
7380 
7381   PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp));
7382   for (i = 0; i < cp; i++) {
7383     mmdata->mp[i]    = mp[i];
7384     mmdata->mptmp[i] = mptmp[i];
7385   }
7386   mmdata->cp             = cp;
7387   C->product->data       = mmdata;
7388   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
7389   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
7390 
7391   /* memory type */
7392   mmdata->mtype = PETSC_MEMTYPE_HOST;
7393   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
7394   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, ""));
7395   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, ""));
7396   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
7397   else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP;
7398   else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS;
7399 
7400   /* prepare coo coordinates for values insertion */
7401 
7402   /* count total nonzeros of those intermediate seqaij Mats
7403     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
7404     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
7405     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
7406   */
7407   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
7408     Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data;
7409     if (mptmp[cp]) continue;
7410     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
7411       const PetscInt *rmap = rmapa[cp];
7412       const PetscInt  mr   = mp[cp]->rmap->n;
7413       const PetscInt  rs   = C->rmap->rstart;
7414       const PetscInt  re   = C->rmap->rend;
7415       const PetscInt *ii   = mm->i;
7416       for (i = 0; i < mr; i++) {
7417         const PetscInt gr = rmap[i];
7418         const PetscInt nz = ii[i + 1] - ii[i];
7419         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
7420         else ncoo_oown += nz;                  /* this row is local */
7421       }
7422     } else ncoo_d += mm->nz;
7423   }
7424 
7425   /*
7426     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
7427 
7428     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
7429 
7430     off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0].
7431 
7432     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
7433     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
7434     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
7435 
7436     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
7437     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive.
7438   */
7439   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */
7440   PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own));
7441 
7442   /* gather (i,j) of nonzeros inserted by remote procs */
7443   if (hasoffproc) {
7444     PetscSF  msf;
7445     PetscInt ncoo2, *coo_i2, *coo_j2;
7446 
7447     PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0]));
7448     PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0]));
7449     PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */
7450 
7451     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
7452       Mat_SeqAIJ *mm     = (Mat_SeqAIJ *)mp[cp]->data;
7453       PetscInt   *idxoff = mmdata->off[cp];
7454       PetscInt   *idxown = mmdata->own[cp];
7455       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
7456         const PetscInt *rmap = rmapa[cp];
7457         const PetscInt *cmap = cmapa[cp];
7458         const PetscInt *ii   = mm->i;
7459         PetscInt       *coi  = coo_i + ncoo_o;
7460         PetscInt       *coj  = coo_j + ncoo_o;
7461         const PetscInt  mr   = mp[cp]->rmap->n;
7462         const PetscInt  rs   = C->rmap->rstart;
7463         const PetscInt  re   = C->rmap->rend;
7464         const PetscInt  cs   = C->cmap->rstart;
7465         for (i = 0; i < mr; i++) {
7466           const PetscInt *jj = mm->j + ii[i];
7467           const PetscInt  gr = rmap[i];
7468           const PetscInt  nz = ii[i + 1] - ii[i];
7469           if (gr < rs || gr >= re) { /* this is an offproc row */
7470             for (j = ii[i]; j < ii[i + 1]; j++) {
7471               *coi++    = gr;
7472               *idxoff++ = j;
7473             }
7474             if (!cmapt[cp]) { /* already global */
7475               for (j = 0; j < nz; j++) *coj++ = jj[j];
7476             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7477               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7478             } else { /* offdiag */
7479               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7480             }
7481             ncoo_o += nz;
7482           } else { /* this is a local row */
7483             for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j;
7484           }
7485         }
7486       }
7487       mmdata->off[cp + 1] = idxoff;
7488       mmdata->own[cp + 1] = idxown;
7489     }
7490 
7491     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7492     PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i));
7493     PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf));
7494     PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL));
7495     ncoo = ncoo_d + ncoo_oown + ncoo2;
7496     PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2));
7497     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */
7498     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown));
7499     PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7500     PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown));
7501     PetscCall(PetscFree2(coo_i, coo_j));
7502     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
7503     PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w));
7504     coo_i = coo_i2;
7505     coo_j = coo_j2;
7506   } else { /* no offproc values insertion */
7507     ncoo = ncoo_d;
7508     PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j));
7509 
7510     PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf));
7511     PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER));
7512     PetscCall(PetscSFSetUp(mmdata->sf));
7513   }
7514   mmdata->hasoffproc = hasoffproc;
7515 
7516   /* gather (i,j) of nonzeros inserted locally */
7517   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
7518     Mat_SeqAIJ     *mm   = (Mat_SeqAIJ *)mp[cp]->data;
7519     PetscInt       *coi  = coo_i + ncoo_d;
7520     PetscInt       *coj  = coo_j + ncoo_d;
7521     const PetscInt *jj   = mm->j;
7522     const PetscInt *ii   = mm->i;
7523     const PetscInt *cmap = cmapa[cp];
7524     const PetscInt *rmap = rmapa[cp];
7525     const PetscInt  mr   = mp[cp]->rmap->n;
7526     const PetscInt  rs   = C->rmap->rstart;
7527     const PetscInt  re   = C->rmap->rend;
7528     const PetscInt  cs   = C->cmap->rstart;
7529 
7530     if (mptmp[cp]) continue;
7531     if (rmapt[cp] == 1) { /* consecutive rows */
7532       /* fill coo_i */
7533       for (i = 0; i < mr; i++) {
7534         const PetscInt gr = i + rs;
7535         for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr;
7536       }
7537       /* fill coo_j */
7538       if (!cmapt[cp]) { /* type-0, already global */
7539         PetscCall(PetscArraycpy(coj, jj, mm->nz));
7540       } else if (cmapt[cp] == 1) {                        /* type-1, local to global for consecutive columns of C */
7541         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
7542       } else {                                            /* type-2, local to global for sparse columns */
7543         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
7544       }
7545       ncoo_d += mm->nz;
7546     } else if (rmapt[cp] == 2) { /* sparse rows */
7547       for (i = 0; i < mr; i++) {
7548         const PetscInt *jj = mm->j + ii[i];
7549         const PetscInt  gr = rmap[i];
7550         const PetscInt  nz = ii[i + 1] - ii[i];
7551         if (gr >= rs && gr < re) { /* local rows */
7552           for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr;
7553           if (!cmapt[cp]) { /* type-0, already global */
7554             for (j = 0; j < nz; j++) *coj++ = jj[j];
7555           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
7556             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
7557           } else { /* type-2, local to global for sparse columns */
7558             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
7559           }
7560           ncoo_d += nz;
7561         }
7562       }
7563     }
7564   }
7565   if (glob) PetscCall(ISRestoreIndices(glob, &globidx));
7566   PetscCall(ISDestroy(&glob));
7567   if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx));
7568   PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g));
7569   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
7570   PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v));
7571 
7572   /* preallocate with COO data */
7573   PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j));
7574   PetscCall(PetscFree2(coo_i, coo_j));
7575   PetscFunctionReturn(PETSC_SUCCESS);
7576 }
7577 
7578 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
7579 {
7580   Mat_Product *product = mat->product;
7581 #if defined(PETSC_HAVE_DEVICE)
7582   PetscBool match  = PETSC_FALSE;
7583   PetscBool usecpu = PETSC_FALSE;
7584 #else
7585   PetscBool match = PETSC_TRUE;
7586 #endif
7587 
7588   PetscFunctionBegin;
7589   MatCheckProduct(mat, 1);
7590 #if defined(PETSC_HAVE_DEVICE)
7591   if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match));
7592   if (match) { /* we can always fallback to the CPU if requested */
7593     switch (product->type) {
7594     case MATPRODUCT_AB:
7595       if (product->api_user) {
7596         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat");
7597         PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7598         PetscOptionsEnd();
7599       } else {
7600         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat");
7601         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL));
7602         PetscOptionsEnd();
7603       }
7604       break;
7605     case MATPRODUCT_AtB:
7606       if (product->api_user) {
7607         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat");
7608         PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7609         PetscOptionsEnd();
7610       } else {
7611         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat");
7612         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL));
7613         PetscOptionsEnd();
7614       }
7615       break;
7616     case MATPRODUCT_PtAP:
7617       if (product->api_user) {
7618         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat");
7619         PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7620         PetscOptionsEnd();
7621       } else {
7622         PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat");
7623         PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL));
7624         PetscOptionsEnd();
7625       }
7626       break;
7627     default:
7628       break;
7629     }
7630     match = (PetscBool)!usecpu;
7631   }
7632 #endif
7633   if (match) {
7634     switch (product->type) {
7635     case MATPRODUCT_AB:
7636     case MATPRODUCT_AtB:
7637     case MATPRODUCT_PtAP:
7638       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7639       break;
7640     default:
7641       break;
7642     }
7643   }
7644   /* fallback to MPIAIJ ops */
7645   if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat));
7646   PetscFunctionReturn(PETSC_SUCCESS);
7647 }
7648 
7649 /*
7650    Produces a set of block column indices of the matrix row, one for each block represented in the original row
7651 
7652    n - the number of block indices in cc[]
7653    cc - the block indices (must be large enough to contain the indices)
7654 */
7655 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc)
7656 {
7657   PetscInt        cnt = -1, nidx, j;
7658   const PetscInt *idx;
7659 
7660   PetscFunctionBegin;
7661   PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL));
7662   if (nidx) {
7663     cnt     = 0;
7664     cc[cnt] = idx[0] / bs;
7665     for (j = 1; j < nidx; j++) {
7666       if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs;
7667     }
7668   }
7669   PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL));
7670   *n = cnt + 1;
7671   PetscFunctionReturn(PETSC_SUCCESS);
7672 }
7673 
7674 /*
7675     Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows
7676 
7677     ncollapsed - the number of block indices
7678     collapsed - the block indices (must be large enough to contain the indices)
7679 */
7680 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed)
7681 {
7682   PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp;
7683 
7684   PetscFunctionBegin;
7685   PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev));
7686   for (i = start + 1; i < start + bs; i++) {
7687     PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur));
7688     PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged));
7689     cprevtmp = cprev;
7690     cprev    = merged;
7691     merged   = cprevtmp;
7692   }
7693   *ncollapsed = nprev;
7694   if (collapsed) *collapsed = cprev;
7695   PetscFunctionReturn(PETSC_SUCCESS);
7696 }
7697 
7698 /*
7699    This will eventually be folded into MatCreateGraph_AIJ() for optimal performance
7700 */
7701 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG)
7702 {
7703   PetscInt           Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc;
7704   Mat                tGmat;
7705   MPI_Comm           comm;
7706   const PetscScalar *vals;
7707   const PetscInt    *idx;
7708   PetscInt          *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0;
7709   MatScalar         *AA; // this is checked in graph
7710   PetscBool          isseqaij;
7711   Mat                a, b, c;
7712   MatType            jtype;
7713 
7714   PetscFunctionBegin;
7715   PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm));
7716   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij));
7717   PetscCall(MatGetType(Gmat, &jtype));
7718   PetscCall(MatCreate(comm, &tGmat));
7719   PetscCall(MatSetType(tGmat, jtype));
7720 
7721   /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold?
7722                Also, if the matrix is symmetric, can we skip this
7723                operation? It can be very expensive on large matrices. */
7724 
7725   // global sizes
7726   PetscCall(MatGetSize(Gmat, &MM, &NN));
7727   PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend));
7728   nloc = Iend - Istart;
7729   PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz));
7730   if (isseqaij) {
7731     a = Gmat;
7732     b = NULL;
7733   } else {
7734     Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
7735     a             = d->A;
7736     b             = d->B;
7737     garray        = d->garray;
7738   }
7739   /* Determine upper bound on non-zeros needed in new filtered matrix */
7740   for (PetscInt row = 0; row < nloc; row++) {
7741     PetscCall(MatGetRow(a, row, &ncols, NULL, NULL));
7742     d_nnz[row] = ncols;
7743     if (ncols > maxcols) maxcols = ncols;
7744     PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL));
7745   }
7746   if (b) {
7747     for (PetscInt row = 0; row < nloc; row++) {
7748       PetscCall(MatGetRow(b, row, &ncols, NULL, NULL));
7749       o_nnz[row] = ncols;
7750       if (ncols > maxcols) maxcols = ncols;
7751       PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL));
7752     }
7753   }
7754   PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM));
7755   PetscCall(MatSetBlockSizes(tGmat, 1, 1));
7756   PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz));
7757   PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz));
7758   PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
7759   PetscCall(PetscFree2(d_nnz, o_nnz));
7760   //
7761   PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ));
7762   nnz0 = nnz1 = 0;
7763   for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7764     for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) {
7765       PetscCall(MatGetRow(c, row, &ncols, &idx, &vals));
7766       for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) {
7767         PetscScalar sv = PetscAbs(PetscRealPart(vals[jj]));
7768         if (PetscRealPart(sv) > vfilter) {
7769           nnz1++;
7770           PetscInt cid = idx[jj] + Istart; //diag
7771           if (c != a) cid = garray[idx[jj]];
7772           AA[ncol_row] = vals[jj];
7773           AJ[ncol_row] = cid;
7774           ncol_row++;
7775         }
7776       }
7777       PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals));
7778       PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES));
7779     }
7780   }
7781   PetscCall(PetscFree2(AA, AJ));
7782   PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY));
7783   PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY));
7784   PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */
7785 
7786   PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols));
7787 
7788   *filteredG = tGmat;
7789   PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view"));
7790   PetscFunctionReturn(PETSC_SUCCESS);
7791 }
7792 
7793 /*
7794  MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix
7795 
7796  Input Parameter:
7797  . Amat - matrix
7798  - symmetrize - make the result symmetric
7799  + scale - scale with diagonal
7800 
7801  Output Parameter:
7802  . a_Gmat - output scalar graph >= 0
7803 
7804  */
7805 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat)
7806 {
7807   PetscInt  Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs;
7808   MPI_Comm  comm;
7809   Mat       Gmat;
7810   PetscBool ismpiaij, isseqaij;
7811   Mat       a, b, c;
7812   MatType   jtype;
7813 
7814   PetscFunctionBegin;
7815   PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm));
7816   PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend));
7817   PetscCall(MatGetSize(Amat, &MM, &NN));
7818   PetscCall(MatGetBlockSize(Amat, &bs));
7819   nloc = (Iend - Istart) / bs;
7820 
7821   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij));
7822   PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij));
7823   PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type");
7824 
7825   /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */
7826   /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast
7827      implementation */
7828   if (bs > 1) {
7829     PetscCall(MatGetType(Amat, &jtype));
7830     PetscCall(MatCreate(comm, &Gmat));
7831     PetscCall(MatSetType(Gmat, jtype));
7832     PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
7833     PetscCall(MatSetBlockSizes(Gmat, 1, 1));
7834     if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) {
7835       PetscInt  *d_nnz, *o_nnz;
7836       MatScalar *aa, val, *AA;
7837       PetscInt  *aj, *ai, *AJ, nc, nmax = 0;
7838       if (isseqaij) {
7839         a = Amat;
7840         b = NULL;
7841       } else {
7842         Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data;
7843         a             = d->A;
7844         b             = d->B;
7845       }
7846       PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc));
7847       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7848       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
7849         PetscInt       *nnz = (c == a) ? d_nnz : o_nnz;
7850         const PetscInt *cols;
7851         for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows
7852           PetscCall(MatGetRow(c, brow, &jj, &cols, NULL));
7853           nnz[brow / bs] = jj / bs;
7854           if (jj % bs) ok = 0;
7855           if (cols) j0 = cols[0];
7856           else j0 = -1;
7857           PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL));
7858           if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs];
7859           for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks
7860             PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL));
7861             if (jj % bs) ok = 0;
7862             if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0;
7863             if (nnz[brow / bs] != jj / bs) ok = 0;
7864             PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL));
7865           }
7866           if (!ok) {
7867             PetscCall(PetscFree2(d_nnz, o_nnz));
7868             goto old_bs;
7869           }
7870         }
7871       }
7872       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7873       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7874       PetscCall(PetscFree2(d_nnz, o_nnz));
7875       PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ));
7876       // diag
7877       for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows
7878         Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data;
7879         ai               = aseq->i;
7880         n                = ai[brow + 1] - ai[brow];
7881         aj               = aseq->j + ai[brow];
7882         for (int k = 0; k < n; k += bs) {        // block columns
7883           AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart)
7884           val        = 0;
7885           for (int ii = 0; ii < bs; ii++) { // rows in block
7886             aa = aseq->a + ai[brow + ii] + k;
7887             for (int jj = 0; jj < bs; jj++) {         // columns in block
7888               val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm
7889             }
7890           }
7891           AA[k / bs] = val;
7892         }
7893         grow = Istart / bs + brow / bs;
7894         PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES));
7895       }
7896       // off-diag
7897       if (ismpiaij) {
7898         Mat_MPIAIJ        *aij = (Mat_MPIAIJ *)Amat->data;
7899         const PetscScalar *vals;
7900         const PetscInt    *cols, *garray = aij->garray;
7901         PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?");
7902         for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows
7903           PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL));
7904           for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) {
7905             AA[k / bs] = 0;
7906             AJ[cidx]   = garray[cols[k]] / bs;
7907           }
7908           nc = ncols / bs;
7909           PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL));
7910           for (int ii = 0; ii < bs; ii++) { // rows in block
7911             PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals));
7912             for (int k = 0; k < ncols; k += bs) {
7913               for (int jj = 0; jj < bs; jj++) { // cols in block
7914                 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj]));
7915               }
7916             }
7917             PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals));
7918           }
7919           grow = Istart / bs + brow / bs;
7920           PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES));
7921         }
7922       }
7923       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7924       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7925       PetscCall(PetscFree2(AA, AJ));
7926     } else {
7927       const PetscScalar *vals;
7928       const PetscInt    *idx;
7929       PetscInt          *d_nnz, *o_nnz, *w0, *w1, *w2;
7930     old_bs:
7931       /*
7932        Determine the preallocation needed for the scalar matrix derived from the vector matrix.
7933        */
7934       PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n"));
7935       PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz));
7936       if (isseqaij) {
7937         PetscInt max_d_nnz;
7938         /*
7939          Determine exact preallocation count for (sequential) scalar matrix
7940          */
7941         PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz));
7942         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7943         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7944         for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7945         PetscCall(PetscFree3(w0, w1, w2));
7946       } else if (ismpiaij) {
7947         Mat             Daij, Oaij;
7948         const PetscInt *garray;
7949         PetscInt        max_d_nnz;
7950         PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray));
7951         /*
7952          Determine exact preallocation count for diagonal block portion of scalar matrix
7953          */
7954         PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz));
7955         max_d_nnz = PetscMin(nloc, bs * max_d_nnz);
7956         PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2));
7957         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL));
7958         PetscCall(PetscFree3(w0, w1, w2));
7959         /*
7960          Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix
7961          */
7962         for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) {
7963           o_nnz[jj] = 0;
7964           for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */
7965             PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7966             o_nnz[jj] += ncols;
7967             PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL));
7968           }
7969           if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc;
7970         }
7971       } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type");
7972       /* get scalar copy (norms) of matrix */
7973       PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz));
7974       PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz));
7975       PetscCall(PetscFree2(d_nnz, o_nnz));
7976       for (Ii = Istart; Ii < Iend; Ii++) {
7977         PetscInt dest_row = Ii / bs;
7978         PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals));
7979         for (jj = 0; jj < ncols; jj++) {
7980           PetscInt    dest_col = idx[jj] / bs;
7981           PetscScalar sv       = PetscAbs(PetscRealPart(vals[jj]));
7982           PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES));
7983         }
7984         PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals));
7985       }
7986       PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY));
7987       PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY));
7988     }
7989   } else {
7990     if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat));
7991     else {
7992       Gmat = Amat;
7993       PetscCall(PetscObjectReference((PetscObject)Gmat));
7994     }
7995     if (isseqaij) {
7996       a = Gmat;
7997       b = NULL;
7998     } else {
7999       Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data;
8000       a             = d->A;
8001       b             = d->B;
8002     }
8003     if (filter >= 0 || scale) {
8004       /* take absolute value of each entry */
8005       for (c = a, kk = 0; c && kk < 2; c = b, kk++) {
8006         MatInfo      info;
8007         PetscScalar *avals;
8008         PetscCall(MatGetInfo(c, MAT_LOCAL, &info));
8009         PetscCall(MatSeqAIJGetArray(c, &avals));
8010         for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]);
8011         PetscCall(MatSeqAIJRestoreArray(c, &avals));
8012       }
8013     }
8014   }
8015   if (symmetrize) {
8016     PetscBool isset, issym;
8017     PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym));
8018     if (!isset || !issym) {
8019       Mat matTrans;
8020       PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans));
8021       PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN));
8022       PetscCall(MatDestroy(&matTrans));
8023     }
8024     PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE));
8025   } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat));
8026   if (scale) {
8027     /* scale c for all diagonal values = 1 or -1 */
8028     Vec diag;
8029     PetscCall(MatCreateVecs(Gmat, &diag, NULL));
8030     PetscCall(MatGetDiagonal(Gmat, diag));
8031     PetscCall(VecReciprocal(diag));
8032     PetscCall(VecSqrtAbs(diag));
8033     PetscCall(MatDiagonalScale(Gmat, diag, diag));
8034     PetscCall(VecDestroy(&diag));
8035   }
8036   PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view"));
8037 
8038   if (filter >= 0) {
8039     Mat Fmat = NULL; /* some silly compiler needs this */
8040 
8041     PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat));
8042     PetscCall(MatDestroy(&Gmat));
8043     Gmat = Fmat;
8044   }
8045   *a_Gmat = Gmat;
8046   PetscFunctionReturn(PETSC_SUCCESS);
8047 }
8048 
8049 /*
8050     Special version for direct calls from Fortran
8051 */
8052 #include <petsc/private/fortranimpl.h>
8053 
8054 /* Change these macros so can be used in void function */
8055 /* Identical to PetscCallVoid, except it assigns to *_ierr */
8056 #undef PetscCall
8057 #define PetscCall(...) \
8058   do { \
8059     PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \
8060     if (PetscUnlikely(ierr_msv_mpiaij)) { \
8061       *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \
8062       return; \
8063     } \
8064   } while (0)
8065 
8066 #undef SETERRQ
8067 #define SETERRQ(comm, ierr, ...) \
8068   do { \
8069     *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \
8070     return; \
8071   } while (0)
8072 
8073 #if defined(PETSC_HAVE_FORTRAN_CAPS)
8074   #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
8075 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
8076   #define matsetvaluesmpiaij_ matsetvaluesmpiaij
8077 #else
8078 #endif
8079 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr)
8080 {
8081   Mat         mat = *mmat;
8082   PetscInt    m = *mm, n = *mn;
8083   InsertMode  addv = *maddv;
8084   Mat_MPIAIJ *aij  = (Mat_MPIAIJ *)mat->data;
8085   PetscScalar value;
8086 
8087   MatCheckPreallocated(mat, 1);
8088   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
8089   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
8090   {
8091     PetscInt  i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend;
8092     PetscInt  cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col;
8093     PetscBool roworiented = aij->roworiented;
8094 
8095     /* Some Variables required in the macro */
8096     Mat         A     = aij->A;
8097     Mat_SeqAIJ *a     = (Mat_SeqAIJ *)A->data;
8098     PetscInt   *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
8099     MatScalar  *aa;
8100     PetscBool   ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
8101     Mat         B                 = aij->B;
8102     Mat_SeqAIJ *b                 = (Mat_SeqAIJ *)B->data;
8103     PetscInt   *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n;
8104     MatScalar  *ba;
8105     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
8106      * cannot use "#if defined" inside a macro. */
8107     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
8108 
8109     PetscInt  *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2;
8110     PetscInt   nonew = a->nonew;
8111     MatScalar *ap1, *ap2;
8112 
8113     PetscFunctionBegin;
8114     PetscCall(MatSeqAIJGetArray(A, &aa));
8115     PetscCall(MatSeqAIJGetArray(B, &ba));
8116     for (i = 0; i < m; i++) {
8117       if (im[i] < 0) continue;
8118       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
8119       if (im[i] >= rstart && im[i] < rend) {
8120         row      = im[i] - rstart;
8121         lastcol1 = -1;
8122         rp1      = aj + ai[row];
8123         ap1      = aa + ai[row];
8124         rmax1    = aimax[row];
8125         nrow1    = ailen[row];
8126         low1     = 0;
8127         high1    = nrow1;
8128         lastcol2 = -1;
8129         rp2      = bj + bi[row];
8130         ap2      = ba + bi[row];
8131         rmax2    = bimax[row];
8132         nrow2    = bilen[row];
8133         low2     = 0;
8134         high2    = nrow2;
8135 
8136         for (j = 0; j < n; j++) {
8137           if (roworiented) value = v[i * n + j];
8138           else value = v[i + j * m];
8139           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
8140           if (in[j] >= cstart && in[j] < cend) {
8141             col = in[j] - cstart;
8142             MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]);
8143           } else if (in[j] < 0) continue;
8144           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
8145             SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
8146           } else {
8147             if (mat->was_assembled) {
8148               if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
8149 #if defined(PETSC_USE_CTABLE)
8150               PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col));
8151               col--;
8152 #else
8153               col = aij->colmap[in[j]] - 1;
8154 #endif
8155               if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) {
8156                 PetscCall(MatDisAssemble_MPIAIJ(mat));
8157                 col = in[j];
8158                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
8159                 B        = aij->B;
8160                 b        = (Mat_SeqAIJ *)B->data;
8161                 bimax    = b->imax;
8162                 bi       = b->i;
8163                 bilen    = b->ilen;
8164                 bj       = b->j;
8165                 rp2      = bj + bi[row];
8166                 ap2      = ba + bi[row];
8167                 rmax2    = bimax[row];
8168                 nrow2    = bilen[row];
8169                 low2     = 0;
8170                 high2    = nrow2;
8171                 bm       = aij->B->rmap->n;
8172                 ba       = b->a;
8173                 inserted = PETSC_FALSE;
8174               }
8175             } else col = in[j];
8176             MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]);
8177           }
8178         }
8179       } else if (!aij->donotstash) {
8180         if (roworiented) {
8181           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8182         } else {
8183           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES))));
8184         }
8185       }
8186     }
8187     PetscCall(MatSeqAIJRestoreArray(A, &aa));
8188     PetscCall(MatSeqAIJRestoreArray(B, &ba));
8189   }
8190   PetscFunctionReturnVoid();
8191 }
8192 
8193 /* Undefining these here since they were redefined from their original definition above! No
8194  * other PETSc functions should be defined past this point, as it is impossible to recover the
8195  * original definitions */
8196 #undef PetscCall
8197 #undef SETERRQ
8198