xref: /petsc/src/mat/impls/nest/matnest.c (revision 11a5261e40035b7c793f2783a2ba6c7cd4f3b077)
1 #include <../src/mat/impls/nest/matnestimpl.h> /*I   "petscmat.h"   I*/
2 #include <../src/mat/impls/aij/seq/aij.h>
3 #include <petscsf.h>
4 
5 static PetscErrorCode MatSetUp_NestIS_Private(Mat, PetscInt, const IS[], PetscInt, const IS[]);
6 static PetscErrorCode MatCreateVecs_Nest(Mat, Vec *, Vec *);
7 static PetscErrorCode MatReset_Nest(Mat);
8 
9 PETSC_INTERN PetscErrorCode MatConvert_Nest_IS(Mat, MatType, MatReuse, Mat *);
10 
11 /* private functions */
12 static PetscErrorCode MatNestGetSizes_Private(Mat A, PetscInt *m, PetscInt *n, PetscInt *M, PetscInt *N) {
13   Mat_Nest *bA = (Mat_Nest *)A->data;
14   PetscInt  i, j;
15 
16   PetscFunctionBegin;
17   *m = *n = *M = *N = 0;
18   for (i = 0; i < bA->nr; i++) { /* rows */
19     PetscInt sm, sM;
20     PetscCall(ISGetLocalSize(bA->isglobal.row[i], &sm));
21     PetscCall(ISGetSize(bA->isglobal.row[i], &sM));
22     *m += sm;
23     *M += sM;
24   }
25   for (j = 0; j < bA->nc; j++) { /* cols */
26     PetscInt sn, sN;
27     PetscCall(ISGetLocalSize(bA->isglobal.col[j], &sn));
28     PetscCall(ISGetSize(bA->isglobal.col[j], &sN));
29     *n += sn;
30     *N += sN;
31   }
32   PetscFunctionReturn(0);
33 }
34 
35 /* operations */
36 static PetscErrorCode MatMult_Nest(Mat A, Vec x, Vec y) {
37   Mat_Nest *bA = (Mat_Nest *)A->data;
38   Vec      *bx = bA->right, *by = bA->left;
39   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
40 
41   PetscFunctionBegin;
42   for (i = 0; i < nr; i++) PetscCall(VecGetSubVector(y, bA->isglobal.row[i], &by[i]));
43   for (i = 0; i < nc; i++) PetscCall(VecGetSubVector(x, bA->isglobal.col[i], &bx[i]));
44   for (i = 0; i < nr; i++) {
45     PetscCall(VecZeroEntries(by[i]));
46     for (j = 0; j < nc; j++) {
47       if (!bA->m[i][j]) continue;
48       /* y[i] <- y[i] + A[i][j] * x[j] */
49       PetscCall(MatMultAdd(bA->m[i][j], bx[j], by[i], by[i]));
50     }
51   }
52   for (i = 0; i < nr; i++) PetscCall(VecRestoreSubVector(y, bA->isglobal.row[i], &by[i]));
53   for (i = 0; i < nc; i++) PetscCall(VecRestoreSubVector(x, bA->isglobal.col[i], &bx[i]));
54   PetscFunctionReturn(0);
55 }
56 
57 static PetscErrorCode MatMultAdd_Nest(Mat A, Vec x, Vec y, Vec z) {
58   Mat_Nest *bA = (Mat_Nest *)A->data;
59   Vec      *bx = bA->right, *bz = bA->left;
60   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
61 
62   PetscFunctionBegin;
63   for (i = 0; i < nr; i++) PetscCall(VecGetSubVector(z, bA->isglobal.row[i], &bz[i]));
64   for (i = 0; i < nc; i++) PetscCall(VecGetSubVector(x, bA->isglobal.col[i], &bx[i]));
65   for (i = 0; i < nr; i++) {
66     if (y != z) {
67       Vec by;
68       PetscCall(VecGetSubVector(y, bA->isglobal.row[i], &by));
69       PetscCall(VecCopy(by, bz[i]));
70       PetscCall(VecRestoreSubVector(y, bA->isglobal.row[i], &by));
71     }
72     for (j = 0; j < nc; j++) {
73       if (!bA->m[i][j]) continue;
74       /* y[i] <- y[i] + A[i][j] * x[j] */
75       PetscCall(MatMultAdd(bA->m[i][j], bx[j], bz[i], bz[i]));
76     }
77   }
78   for (i = 0; i < nr; i++) PetscCall(VecRestoreSubVector(z, bA->isglobal.row[i], &bz[i]));
79   for (i = 0; i < nc; i++) PetscCall(VecRestoreSubVector(x, bA->isglobal.col[i], &bx[i]));
80   PetscFunctionReturn(0);
81 }
82 
83 typedef struct {
84   Mat         *workC;      /* array of Mat with specific containers depending on the underlying MatMatMult implementation */
85   PetscScalar *tarray;     /* buffer for storing all temporary products A[i][j] B[j] */
86   PetscInt    *dm, *dn, k; /* displacements and number of submatrices */
87 } Nest_Dense;
88 
89 PETSC_INTERN PetscErrorCode MatProductNumeric_Nest_Dense(Mat C) {
90   Mat_Nest          *bA;
91   Nest_Dense        *contents;
92   Mat                viewB, viewC, productB, workC;
93   const PetscScalar *barray;
94   PetscScalar       *carray;
95   PetscInt           i, j, M, N, nr, nc, ldb, ldc;
96   Mat                A, B;
97 
98   PetscFunctionBegin;
99   MatCheckProduct(C, 3);
100   A = C->product->A;
101   B = C->product->B;
102   PetscCall(MatGetSize(B, NULL, &N));
103   if (!N) {
104     PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
105     PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
106     PetscFunctionReturn(0);
107   }
108   contents = (Nest_Dense *)C->product->data;
109   PetscCheck(contents, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty");
110   bA = (Mat_Nest *)A->data;
111   nr = bA->nr;
112   nc = bA->nc;
113   PetscCall(MatDenseGetLDA(B, &ldb));
114   PetscCall(MatDenseGetLDA(C, &ldc));
115   PetscCall(MatZeroEntries(C));
116   PetscCall(MatDenseGetArrayRead(B, &barray));
117   PetscCall(MatDenseGetArray(C, &carray));
118   for (i = 0; i < nr; i++) {
119     PetscCall(ISGetSize(bA->isglobal.row[i], &M));
120     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), contents->dm[i + 1] - contents->dm[i], PETSC_DECIDE, M, N, carray + contents->dm[i], &viewC));
121     PetscCall(MatDenseSetLDA(viewC, ldc));
122     for (j = 0; j < nc; j++) {
123       if (!bA->m[i][j]) continue;
124       PetscCall(ISGetSize(bA->isglobal.col[j], &M));
125       PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), contents->dn[j + 1] - contents->dn[j], PETSC_DECIDE, M, N, (PetscScalar *)(barray + contents->dn[j]), &viewB));
126       PetscCall(MatDenseSetLDA(viewB, ldb));
127 
128       /* MatMatMultNumeric(bA->m[i][j],viewB,contents->workC[i*nc + j]); */
129       workC             = contents->workC[i * nc + j];
130       productB          = workC->product->B;
131       workC->product->B = viewB; /* use newly created dense matrix viewB */
132       PetscCall(MatProductNumeric(workC));
133       PetscCall(MatDestroy(&viewB));
134       workC->product->B = productB; /* resume original B */
135 
136       /* C[i] <- workC + C[i] */
137       PetscCall(MatAXPY(viewC, 1.0, contents->workC[i * nc + j], SAME_NONZERO_PATTERN));
138     }
139     PetscCall(MatDestroy(&viewC));
140   }
141   PetscCall(MatDenseRestoreArray(C, &carray));
142   PetscCall(MatDenseRestoreArrayRead(B, &barray));
143 
144   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
145   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode MatNest_DenseDestroy(void *ctx) {
150   Nest_Dense *contents = (Nest_Dense *)ctx;
151   PetscInt    i;
152 
153   PetscFunctionBegin;
154   PetscCall(PetscFree(contents->tarray));
155   for (i = 0; i < contents->k; i++) PetscCall(MatDestroy(contents->workC + i));
156   PetscCall(PetscFree3(contents->dm, contents->dn, contents->workC));
157   PetscCall(PetscFree(contents));
158   PetscFunctionReturn(0);
159 }
160 
161 PETSC_INTERN PetscErrorCode MatProductSymbolic_Nest_Dense(Mat C) {
162   Mat_Nest          *bA;
163   Mat                viewB, workC;
164   const PetscScalar *barray;
165   PetscInt           i, j, M, N, m, n, nr, nc, maxm = 0, ldb;
166   Nest_Dense        *contents = NULL;
167   PetscBool          cisdense;
168   Mat                A, B;
169   PetscReal          fill;
170 
171   PetscFunctionBegin;
172   MatCheckProduct(C, 4);
173   PetscCheck(!C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty");
174   A    = C->product->A;
175   B    = C->product->B;
176   fill = C->product->fill;
177   bA   = (Mat_Nest *)A->data;
178   nr   = bA->nr;
179   nc   = bA->nc;
180   PetscCall(MatGetLocalSize(C, &m, &n));
181   PetscCall(MatGetSize(C, &M, &N));
182   if (m == PETSC_DECIDE || n == PETSC_DECIDE || M == PETSC_DECIDE || N == PETSC_DECIDE) {
183     PetscCall(MatGetLocalSize(B, NULL, &n));
184     PetscCall(MatGetSize(B, NULL, &N));
185     PetscCall(MatGetLocalSize(A, &m, NULL));
186     PetscCall(MatGetSize(A, &M, NULL));
187     PetscCall(MatSetSizes(C, m, n, M, N));
188   }
189   PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, MATMPIDENSECUDA, ""));
190   if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)B)->type_name));
191   PetscCall(MatSetUp(C));
192   if (!N) {
193     C->ops->productnumeric = MatProductNumeric_Nest_Dense;
194     PetscFunctionReturn(0);
195   }
196 
197   PetscCall(PetscNew(&contents));
198   C->product->data    = contents;
199   C->product->destroy = MatNest_DenseDestroy;
200   PetscCall(PetscCalloc3(nr + 1, &contents->dm, nc + 1, &contents->dn, nr * nc, &contents->workC));
201   contents->k = nr * nc;
202   for (i = 0; i < nr; i++) {
203     PetscCall(ISGetLocalSize(bA->isglobal.row[i], contents->dm + i + 1));
204     maxm = PetscMax(maxm, contents->dm[i + 1]);
205     contents->dm[i + 1] += contents->dm[i];
206   }
207   for (i = 0; i < nc; i++) {
208     PetscCall(ISGetLocalSize(bA->isglobal.col[i], contents->dn + i + 1));
209     contents->dn[i + 1] += contents->dn[i];
210   }
211   PetscCall(PetscMalloc1(maxm * N, &contents->tarray));
212   PetscCall(MatDenseGetLDA(B, &ldb));
213   PetscCall(MatGetSize(B, NULL, &N));
214   PetscCall(MatDenseGetArrayRead(B, &barray));
215   /* loops are permuted compared to MatMatMultNumeric so that viewB is created only once per column of A */
216   for (j = 0; j < nc; j++) {
217     PetscCall(ISGetSize(bA->isglobal.col[j], &M));
218     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), contents->dn[j + 1] - contents->dn[j], PETSC_DECIDE, M, N, (PetscScalar *)(barray + contents->dn[j]), &viewB));
219     PetscCall(MatDenseSetLDA(viewB, ldb));
220     for (i = 0; i < nr; i++) {
221       if (!bA->m[i][j]) continue;
222       /* MatMatMultSymbolic may attach a specific container (depending on MatType of bA->m[i][j]) to workC[i][j] */
223 
224       PetscCall(MatProductCreate(bA->m[i][j], viewB, NULL, &contents->workC[i * nc + j]));
225       workC = contents->workC[i * nc + j];
226       PetscCall(MatProductSetType(workC, MATPRODUCT_AB));
227       PetscCall(MatProductSetAlgorithm(workC, "default"));
228       PetscCall(MatProductSetFill(workC, fill));
229       PetscCall(MatProductSetFromOptions(workC));
230       PetscCall(MatProductSymbolic(workC));
231 
232       /* since tarray will be shared by all Mat */
233       PetscCall(MatSeqDenseSetPreallocation(workC, contents->tarray));
234       PetscCall(MatMPIDenseSetPreallocation(workC, contents->tarray));
235     }
236     PetscCall(MatDestroy(&viewB));
237   }
238   PetscCall(MatDenseRestoreArrayRead(B, &barray));
239 
240   C->ops->productnumeric = MatProductNumeric_Nest_Dense;
241   PetscFunctionReturn(0);
242 }
243 
244 /* --------------------------------------------------------- */
245 static PetscErrorCode MatProductSetFromOptions_Nest_Dense_AB(Mat C) {
246   PetscFunctionBegin;
247   C->ops->productsymbolic = MatProductSymbolic_Nest_Dense;
248   PetscFunctionReturn(0);
249 }
250 
251 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_Nest_Dense(Mat C) {
252   Mat_Product *product = C->product;
253 
254   PetscFunctionBegin;
255   if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_Nest_Dense_AB(C));
256   PetscFunctionReturn(0);
257 }
258 /* --------------------------------------------------------- */
259 
260 static PetscErrorCode MatMultTranspose_Nest(Mat A, Vec x, Vec y) {
261   Mat_Nest *bA = (Mat_Nest *)A->data;
262   Vec      *bx = bA->left, *by = bA->right;
263   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
264 
265   PetscFunctionBegin;
266   for (i = 0; i < nr; i++) PetscCall(VecGetSubVector(x, bA->isglobal.row[i], &bx[i]));
267   for (i = 0; i < nc; i++) PetscCall(VecGetSubVector(y, bA->isglobal.col[i], &by[i]));
268   for (j = 0; j < nc; j++) {
269     PetscCall(VecZeroEntries(by[j]));
270     for (i = 0; i < nr; i++) {
271       if (!bA->m[i][j]) continue;
272       /* y[j] <- y[j] + (A[i][j])^T * x[i] */
273       PetscCall(MatMultTransposeAdd(bA->m[i][j], bx[i], by[j], by[j]));
274     }
275   }
276   for (i = 0; i < nr; i++) PetscCall(VecRestoreSubVector(x, bA->isglobal.row[i], &bx[i]));
277   for (i = 0; i < nc; i++) PetscCall(VecRestoreSubVector(y, bA->isglobal.col[i], &by[i]));
278   PetscFunctionReturn(0);
279 }
280 
281 static PetscErrorCode MatMultTransposeAdd_Nest(Mat A, Vec x, Vec y, Vec z) {
282   Mat_Nest *bA = (Mat_Nest *)A->data;
283   Vec      *bx = bA->left, *bz = bA->right;
284   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
285 
286   PetscFunctionBegin;
287   for (i = 0; i < nr; i++) PetscCall(VecGetSubVector(x, bA->isglobal.row[i], &bx[i]));
288   for (i = 0; i < nc; i++) PetscCall(VecGetSubVector(z, bA->isglobal.col[i], &bz[i]));
289   for (j = 0; j < nc; j++) {
290     if (y != z) {
291       Vec by;
292       PetscCall(VecGetSubVector(y, bA->isglobal.col[j], &by));
293       PetscCall(VecCopy(by, bz[j]));
294       PetscCall(VecRestoreSubVector(y, bA->isglobal.col[j], &by));
295     }
296     for (i = 0; i < nr; i++) {
297       if (!bA->m[i][j]) continue;
298       /* z[j] <- y[j] + (A[i][j])^T * x[i] */
299       PetscCall(MatMultTransposeAdd(bA->m[i][j], bx[i], bz[j], bz[j]));
300     }
301   }
302   for (i = 0; i < nr; i++) PetscCall(VecRestoreSubVector(x, bA->isglobal.row[i], &bx[i]));
303   for (i = 0; i < nc; i++) PetscCall(VecRestoreSubVector(z, bA->isglobal.col[i], &bz[i]));
304   PetscFunctionReturn(0);
305 }
306 
307 static PetscErrorCode MatTranspose_Nest(Mat A, MatReuse reuse, Mat *B) {
308   Mat_Nest *bA = (Mat_Nest *)A->data, *bC;
309   Mat       C;
310   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
311 
312   PetscFunctionBegin;
313   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
314   PetscCheck(reuse != MAT_INPLACE_MATRIX || nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_SIZ, "Square nested matrix only for in-place");
315 
316   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
317     Mat *subs;
318     IS  *is_row, *is_col;
319 
320     PetscCall(PetscCalloc1(nr * nc, &subs));
321     PetscCall(PetscMalloc2(nr, &is_row, nc, &is_col));
322     PetscCall(MatNestGetISs(A, is_row, is_col));
323     if (reuse == MAT_INPLACE_MATRIX) {
324       for (i = 0; i < nr; i++) {
325         for (j = 0; j < nc; j++) subs[i + nr * j] = bA->m[i][j];
326       }
327     }
328 
329     PetscCall(MatCreateNest(PetscObjectComm((PetscObject)A), nc, is_col, nr, is_row, subs, &C));
330     PetscCall(PetscFree(subs));
331     PetscCall(PetscFree2(is_row, is_col));
332   } else {
333     C = *B;
334   }
335 
336   bC = (Mat_Nest *)C->data;
337   for (i = 0; i < nr; i++) {
338     for (j = 0; j < nc; j++) {
339       if (bA->m[i][j]) {
340         PetscCall(MatTranspose(bA->m[i][j], reuse, &(bC->m[j][i])));
341       } else {
342         bC->m[j][i] = NULL;
343       }
344     }
345   }
346 
347   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
348     *B = C;
349   } else {
350     PetscCall(MatHeaderMerge(A, &C));
351   }
352   PetscFunctionReturn(0);
353 }
354 
355 static PetscErrorCode MatNestDestroyISList(PetscInt n, IS **list) {
356   IS      *lst = *list;
357   PetscInt i;
358 
359   PetscFunctionBegin;
360   if (!lst) PetscFunctionReturn(0);
361   for (i = 0; i < n; i++)
362     if (lst[i]) PetscCall(ISDestroy(&lst[i]));
363   PetscCall(PetscFree(lst));
364   *list = NULL;
365   PetscFunctionReturn(0);
366 }
367 
368 static PetscErrorCode MatReset_Nest(Mat A) {
369   Mat_Nest *vs = (Mat_Nest *)A->data;
370   PetscInt  i, j;
371 
372   PetscFunctionBegin;
373   /* release the matrices and the place holders */
374   PetscCall(MatNestDestroyISList(vs->nr, &vs->isglobal.row));
375   PetscCall(MatNestDestroyISList(vs->nc, &vs->isglobal.col));
376   PetscCall(MatNestDestroyISList(vs->nr, &vs->islocal.row));
377   PetscCall(MatNestDestroyISList(vs->nc, &vs->islocal.col));
378 
379   PetscCall(PetscFree(vs->row_len));
380   PetscCall(PetscFree(vs->col_len));
381   PetscCall(PetscFree(vs->nnzstate));
382 
383   PetscCall(PetscFree2(vs->left, vs->right));
384 
385   /* release the matrices and the place holders */
386   if (vs->m) {
387     for (i = 0; i < vs->nr; i++) {
388       for (j = 0; j < vs->nc; j++) PetscCall(MatDestroy(&vs->m[i][j]));
389       PetscCall(PetscFree(vs->m[i]));
390     }
391     PetscCall(PetscFree(vs->m));
392   }
393 
394   /* restore defaults */
395   vs->nr            = 0;
396   vs->nc            = 0;
397   vs->splitassembly = PETSC_FALSE;
398   PetscFunctionReturn(0);
399 }
400 
401 static PetscErrorCode MatDestroy_Nest(Mat A) {
402   PetscFunctionBegin;
403   PetscCall(MatReset_Nest(A));
404   PetscCall(PetscFree(A->data));
405   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetSubMat_C", NULL));
406   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestSetSubMat_C", NULL));
407   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetSubMats_C", NULL));
408   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetSize_C", NULL));
409   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetISs_C", NULL));
410   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetLocalISs_C", NULL));
411   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestSetVecType_C", NULL));
412   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestSetSubMats_C", NULL));
413   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_mpiaij_C", NULL));
414   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_seqaij_C", NULL));
415   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_aij_C", NULL));
416   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_is_C", NULL));
417   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_mpidense_C", NULL));
418   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_seqdense_C", NULL));
419   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_nest_seqdense_C", NULL));
420   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_nest_mpidense_C", NULL));
421   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_nest_dense_C", NULL));
422   PetscFunctionReturn(0);
423 }
424 
425 static PetscErrorCode MatMissingDiagonal_Nest(Mat mat, PetscBool *missing, PetscInt *dd) {
426   Mat_Nest *vs = (Mat_Nest *)mat->data;
427   PetscInt  i;
428 
429   PetscFunctionBegin;
430   if (dd) *dd = 0;
431   if (!vs->nr) {
432     *missing = PETSC_TRUE;
433     PetscFunctionReturn(0);
434   }
435   *missing = PETSC_FALSE;
436   for (i = 0; i < vs->nr && !(*missing); i++) {
437     *missing = PETSC_TRUE;
438     if (vs->m[i][i]) {
439       PetscCall(MatMissingDiagonal(vs->m[i][i], missing, NULL));
440       PetscCheck(!*missing || !dd, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "First missing entry not yet implemented");
441     }
442   }
443   PetscFunctionReturn(0);
444 }
445 
446 static PetscErrorCode MatAssemblyBegin_Nest(Mat A, MatAssemblyType type) {
447   Mat_Nest *vs = (Mat_Nest *)A->data;
448   PetscInt  i, j;
449   PetscBool nnzstate = PETSC_FALSE;
450 
451   PetscFunctionBegin;
452   for (i = 0; i < vs->nr; i++) {
453     for (j = 0; j < vs->nc; j++) {
454       PetscObjectState subnnzstate = 0;
455       if (vs->m[i][j]) {
456         PetscCall(MatAssemblyBegin(vs->m[i][j], type));
457         if (!vs->splitassembly) {
458           /* Note: split assembly will fail if the same block appears more than once (even indirectly through a nested
459            * sub-block). This could be fixed by adding a flag to Mat so that there was a way to check if a Mat was
460            * already performing an assembly, but the result would by more complicated and appears to offer less
461            * potential for diagnostics and correctness checking. Split assembly should be fixed once there is an
462            * interface for libraries to make asynchronous progress in "user-defined non-blocking collectives".
463            */
464           PetscCall(MatAssemblyEnd(vs->m[i][j], type));
465           PetscCall(MatGetNonzeroState(vs->m[i][j], &subnnzstate));
466         }
467       }
468       nnzstate                     = (PetscBool)(nnzstate || vs->nnzstate[i * vs->nc + j] != subnnzstate);
469       vs->nnzstate[i * vs->nc + j] = subnnzstate;
470     }
471   }
472   if (nnzstate) A->nonzerostate++;
473   PetscFunctionReturn(0);
474 }
475 
476 static PetscErrorCode MatAssemblyEnd_Nest(Mat A, MatAssemblyType type) {
477   Mat_Nest *vs = (Mat_Nest *)A->data;
478   PetscInt  i, j;
479 
480   PetscFunctionBegin;
481   for (i = 0; i < vs->nr; i++) {
482     for (j = 0; j < vs->nc; j++) {
483       if (vs->m[i][j]) {
484         if (vs->splitassembly) PetscCall(MatAssemblyEnd(vs->m[i][j], type));
485       }
486     }
487   }
488   PetscFunctionReturn(0);
489 }
490 
491 static PetscErrorCode MatNestFindNonzeroSubMatRow(Mat A, PetscInt row, Mat *B) {
492   Mat_Nest *vs = (Mat_Nest *)A->data;
493   PetscInt  j;
494   Mat       sub;
495 
496   PetscFunctionBegin;
497   sub = (row < vs->nc) ? vs->m[row][row] : (Mat)NULL; /* Prefer to find on the diagonal */
498   for (j = 0; !sub && j < vs->nc; j++) sub = vs->m[row][j];
499   if (sub) PetscCall(MatSetUp(sub)); /* Ensure that the sizes are available */
500   *B = sub;
501   PetscFunctionReturn(0);
502 }
503 
504 static PetscErrorCode MatNestFindNonzeroSubMatCol(Mat A, PetscInt col, Mat *B) {
505   Mat_Nest *vs = (Mat_Nest *)A->data;
506   PetscInt  i;
507   Mat       sub;
508 
509   PetscFunctionBegin;
510   sub = (col < vs->nr) ? vs->m[col][col] : (Mat)NULL; /* Prefer to find on the diagonal */
511   for (i = 0; !sub && i < vs->nr; i++) sub = vs->m[i][col];
512   if (sub) PetscCall(MatSetUp(sub)); /* Ensure that the sizes are available */
513   *B = sub;
514   PetscFunctionReturn(0);
515 }
516 
517 static PetscErrorCode MatNestFindISRange(Mat A, PetscInt n, const IS list[], IS is, PetscInt *begin, PetscInt *end) {
518   PetscInt  i, j, size, m;
519   PetscBool flg;
520   IS        out, concatenate[2];
521 
522   PetscFunctionBegin;
523   PetscValidPointer(list, 3);
524   PetscValidHeaderSpecific(is, IS_CLASSID, 4);
525   if (begin) {
526     PetscValidIntPointer(begin, 5);
527     *begin = -1;
528   }
529   if (end) {
530     PetscValidIntPointer(end, 6);
531     *end = -1;
532   }
533   for (i = 0; i < n; i++) {
534     if (!list[i]) continue;
535     PetscCall(ISEqualUnsorted(list[i], is, &flg));
536     if (flg) {
537       if (begin) *begin = i;
538       if (end) *end = i + 1;
539       PetscFunctionReturn(0);
540     }
541   }
542   PetscCall(ISGetSize(is, &size));
543   for (i = 0; i < n - 1; i++) {
544     if (!list[i]) continue;
545     m = 0;
546     PetscCall(ISConcatenate(PetscObjectComm((PetscObject)A), 2, list + i, &out));
547     PetscCall(ISGetSize(out, &m));
548     for (j = i + 2; j < n && m < size; j++) {
549       if (list[j]) {
550         concatenate[0] = out;
551         concatenate[1] = list[j];
552         PetscCall(ISConcatenate(PetscObjectComm((PetscObject)A), 2, concatenate, &out));
553         PetscCall(ISDestroy(concatenate));
554         PetscCall(ISGetSize(out, &m));
555       }
556     }
557     if (m == size) {
558       PetscCall(ISEqualUnsorted(out, is, &flg));
559       if (flg) {
560         if (begin) *begin = i;
561         if (end) *end = j;
562         PetscCall(ISDestroy(&out));
563         PetscFunctionReturn(0);
564       }
565     }
566     PetscCall(ISDestroy(&out));
567   }
568   PetscFunctionReturn(0);
569 }
570 
571 static PetscErrorCode MatNestFillEmptyMat_Private(Mat A, PetscInt i, PetscInt j, Mat *B) {
572   Mat_Nest *vs = (Mat_Nest *)A->data;
573   PetscInt  lr, lc;
574 
575   PetscFunctionBegin;
576   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
577   PetscCall(ISGetLocalSize(vs->isglobal.row[i], &lr));
578   PetscCall(ISGetLocalSize(vs->isglobal.col[j], &lc));
579   PetscCall(MatSetSizes(*B, lr, lc, PETSC_DECIDE, PETSC_DECIDE));
580   PetscCall(MatSetType(*B, MATAIJ));
581   PetscCall(MatSeqAIJSetPreallocation(*B, 0, NULL));
582   PetscCall(MatMPIAIJSetPreallocation(*B, 0, NULL, 0, NULL));
583   PetscCall(MatSetUp(*B));
584   PetscCall(MatSetOption(*B, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
585   PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
586   PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
587   PetscFunctionReturn(0);
588 }
589 
590 static PetscErrorCode MatNestGetBlock_Private(Mat A, PetscInt rbegin, PetscInt rend, PetscInt cbegin, PetscInt cend, Mat *B) {
591   Mat_Nest  *vs = (Mat_Nest *)A->data;
592   Mat       *a;
593   PetscInt   i, j, k, l, nr = rend - rbegin, nc = cend - cbegin;
594   char       keyname[256];
595   PetscBool *b;
596   PetscBool  flg;
597 
598   PetscFunctionBegin;
599   *B = NULL;
600   PetscCall(PetscSNPrintf(keyname, sizeof(keyname), "NestBlock_%" PetscInt_FMT "-%" PetscInt_FMT "x%" PetscInt_FMT "-%" PetscInt_FMT, rbegin, rend, cbegin, cend));
601   PetscCall(PetscObjectQuery((PetscObject)A, keyname, (PetscObject *)B));
602   if (*B) PetscFunctionReturn(0);
603 
604   PetscCall(PetscMalloc2(nr * nc, &a, nr * nc, &b));
605   for (i = 0; i < nr; i++) {
606     for (j = 0; j < nc; j++) {
607       a[i * nc + j] = vs->m[rbegin + i][cbegin + j];
608       b[i * nc + j] = PETSC_FALSE;
609     }
610   }
611   if (nc != vs->nc && nr != vs->nr) {
612     for (i = 0; i < nr; i++) {
613       for (j = 0; j < nc; j++) {
614         flg = PETSC_FALSE;
615         for (k = 0; (k < nr && !flg); k++) {
616           if (a[j + k * nc]) flg = PETSC_TRUE;
617         }
618         if (flg) {
619           flg = PETSC_FALSE;
620           for (l = 0; (l < nc && !flg); l++) {
621             if (a[i * nc + l]) flg = PETSC_TRUE;
622           }
623         }
624         if (!flg) {
625           b[i * nc + j] = PETSC_TRUE;
626           PetscCall(MatNestFillEmptyMat_Private(A, rbegin + i, cbegin + j, a + i * nc + j));
627         }
628       }
629     }
630   }
631   PetscCall(MatCreateNest(PetscObjectComm((PetscObject)A), nr, nr != vs->nr ? NULL : vs->isglobal.row, nc, nc != vs->nc ? NULL : vs->isglobal.col, a, B));
632   for (i = 0; i < nr; i++) {
633     for (j = 0; j < nc; j++) {
634       if (b[i * nc + j]) PetscCall(MatDestroy(a + i * nc + j));
635     }
636   }
637   PetscCall(PetscFree2(a, b));
638   (*B)->assembled = A->assembled;
639   PetscCall(PetscObjectCompose((PetscObject)A, keyname, (PetscObject)*B));
640   PetscCall(PetscObjectDereference((PetscObject)*B)); /* Leave the only remaining reference in the composition */
641   PetscFunctionReturn(0);
642 }
643 
644 static PetscErrorCode MatNestFindSubMat(Mat A, struct MatNestISPair *is, IS isrow, IS iscol, Mat *B) {
645   Mat_Nest *vs = (Mat_Nest *)A->data;
646   PetscInt  rbegin, rend, cbegin, cend;
647 
648   PetscFunctionBegin;
649   PetscCall(MatNestFindISRange(A, vs->nr, is->row, isrow, &rbegin, &rend));
650   PetscCall(MatNestFindISRange(A, vs->nc, is->col, iscol, &cbegin, &cend));
651   if (rend == rbegin + 1 && cend == cbegin + 1) {
652     if (!vs->m[rbegin][cbegin]) PetscCall(MatNestFillEmptyMat_Private(A, rbegin, cbegin, vs->m[rbegin] + cbegin));
653     *B = vs->m[rbegin][cbegin];
654   } else if (rbegin != -1 && cbegin != -1) {
655     PetscCall(MatNestGetBlock_Private(A, rbegin, rend, cbegin, cend, B));
656   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Could not find index set");
657   PetscFunctionReturn(0);
658 }
659 
660 /*
661    TODO: This does not actually returns a submatrix we can modify
662 */
663 static PetscErrorCode MatCreateSubMatrix_Nest(Mat A, IS isrow, IS iscol, MatReuse reuse, Mat *B) {
664   Mat_Nest *vs = (Mat_Nest *)A->data;
665   Mat       sub;
666 
667   PetscFunctionBegin;
668   PetscCall(MatNestFindSubMat(A, &vs->isglobal, isrow, iscol, &sub));
669   switch (reuse) {
670   case MAT_INITIAL_MATRIX:
671     if (sub) PetscCall(PetscObjectReference((PetscObject)sub));
672     *B = sub;
673     break;
674   case MAT_REUSE_MATRIX: PetscCheck(sub == *B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Submatrix was not used before in this call"); break;
675   case MAT_IGNORE_MATRIX: /* Nothing to do */ break;
676   case MAT_INPLACE_MATRIX: /* Nothing to do */ SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_INPLACE_MATRIX is not supported yet");
677   }
678   PetscFunctionReturn(0);
679 }
680 
681 PetscErrorCode MatGetLocalSubMatrix_Nest(Mat A, IS isrow, IS iscol, Mat *B) {
682   Mat_Nest *vs = (Mat_Nest *)A->data;
683   Mat       sub;
684 
685   PetscFunctionBegin;
686   PetscCall(MatNestFindSubMat(A, &vs->islocal, isrow, iscol, &sub));
687   /* We allow the submatrix to be NULL, perhaps it would be better for the user to return an empty matrix instead */
688   if (sub) PetscCall(PetscObjectReference((PetscObject)sub));
689   *B = sub;
690   PetscFunctionReturn(0);
691 }
692 
693 static PetscErrorCode MatRestoreLocalSubMatrix_Nest(Mat A, IS isrow, IS iscol, Mat *B) {
694   Mat_Nest *vs = (Mat_Nest *)A->data;
695   Mat       sub;
696 
697   PetscFunctionBegin;
698   PetscCall(MatNestFindSubMat(A, &vs->islocal, isrow, iscol, &sub));
699   PetscCheck(*B == sub, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Local submatrix has not been gotten");
700   if (sub) {
701     PetscCheck(((PetscObject)sub)->refct > 1, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Local submatrix has had reference count decremented too many times");
702     PetscCall(MatDestroy(B));
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 static PetscErrorCode MatGetDiagonal_Nest(Mat A, Vec v) {
708   Mat_Nest *bA = (Mat_Nest *)A->data;
709   PetscInt  i;
710 
711   PetscFunctionBegin;
712   for (i = 0; i < bA->nr; i++) {
713     Vec bv;
714     PetscCall(VecGetSubVector(v, bA->isglobal.row[i], &bv));
715     if (bA->m[i][i]) {
716       PetscCall(MatGetDiagonal(bA->m[i][i], bv));
717     } else {
718       PetscCall(VecSet(bv, 0.0));
719     }
720     PetscCall(VecRestoreSubVector(v, bA->isglobal.row[i], &bv));
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 static PetscErrorCode MatDiagonalScale_Nest(Mat A, Vec l, Vec r) {
726   Mat_Nest *bA = (Mat_Nest *)A->data;
727   Vec       bl, *br;
728   PetscInt  i, j;
729 
730   PetscFunctionBegin;
731   PetscCall(PetscCalloc1(bA->nc, &br));
732   if (r) {
733     for (j = 0; j < bA->nc; j++) PetscCall(VecGetSubVector(r, bA->isglobal.col[j], &br[j]));
734   }
735   bl = NULL;
736   for (i = 0; i < bA->nr; i++) {
737     if (l) PetscCall(VecGetSubVector(l, bA->isglobal.row[i], &bl));
738     for (j = 0; j < bA->nc; j++) {
739       if (bA->m[i][j]) PetscCall(MatDiagonalScale(bA->m[i][j], bl, br[j]));
740     }
741     if (l) PetscCall(VecRestoreSubVector(l, bA->isglobal.row[i], &bl));
742   }
743   if (r) {
744     for (j = 0; j < bA->nc; j++) PetscCall(VecRestoreSubVector(r, bA->isglobal.col[j], &br[j]));
745   }
746   PetscCall(PetscFree(br));
747   PetscFunctionReturn(0);
748 }
749 
750 static PetscErrorCode MatScale_Nest(Mat A, PetscScalar a) {
751   Mat_Nest *bA = (Mat_Nest *)A->data;
752   PetscInt  i, j;
753 
754   PetscFunctionBegin;
755   for (i = 0; i < bA->nr; i++) {
756     for (j = 0; j < bA->nc; j++) {
757       if (bA->m[i][j]) PetscCall(MatScale(bA->m[i][j], a));
758     }
759   }
760   PetscFunctionReturn(0);
761 }
762 
763 static PetscErrorCode MatShift_Nest(Mat A, PetscScalar a) {
764   Mat_Nest *bA = (Mat_Nest *)A->data;
765   PetscInt  i;
766   PetscBool nnzstate = PETSC_FALSE;
767 
768   PetscFunctionBegin;
769   for (i = 0; i < bA->nr; i++) {
770     PetscObjectState subnnzstate = 0;
771     PetscCheck(bA->m[i][i], PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for shifting an empty diagonal block, insert a matrix in block (%" PetscInt_FMT ",%" PetscInt_FMT ")", i, i);
772     PetscCall(MatShift(bA->m[i][i], a));
773     PetscCall(MatGetNonzeroState(bA->m[i][i], &subnnzstate));
774     nnzstate                     = (PetscBool)(nnzstate || bA->nnzstate[i * bA->nc + i] != subnnzstate);
775     bA->nnzstate[i * bA->nc + i] = subnnzstate;
776   }
777   if (nnzstate) A->nonzerostate++;
778   PetscFunctionReturn(0);
779 }
780 
781 static PetscErrorCode MatDiagonalSet_Nest(Mat A, Vec D, InsertMode is) {
782   Mat_Nest *bA = (Mat_Nest *)A->data;
783   PetscInt  i;
784   PetscBool nnzstate = PETSC_FALSE;
785 
786   PetscFunctionBegin;
787   for (i = 0; i < bA->nr; i++) {
788     PetscObjectState subnnzstate = 0;
789     Vec              bv;
790     PetscCall(VecGetSubVector(D, bA->isglobal.row[i], &bv));
791     if (bA->m[i][i]) {
792       PetscCall(MatDiagonalSet(bA->m[i][i], bv, is));
793       PetscCall(MatGetNonzeroState(bA->m[i][i], &subnnzstate));
794     }
795     PetscCall(VecRestoreSubVector(D, bA->isglobal.row[i], &bv));
796     nnzstate                     = (PetscBool)(nnzstate || bA->nnzstate[i * bA->nc + i] != subnnzstate);
797     bA->nnzstate[i * bA->nc + i] = subnnzstate;
798   }
799   if (nnzstate) A->nonzerostate++;
800   PetscFunctionReturn(0);
801 }
802 
803 static PetscErrorCode MatSetRandom_Nest(Mat A, PetscRandom rctx) {
804   Mat_Nest *bA = (Mat_Nest *)A->data;
805   PetscInt  i, j;
806 
807   PetscFunctionBegin;
808   for (i = 0; i < bA->nr; i++) {
809     for (j = 0; j < bA->nc; j++) {
810       if (bA->m[i][j]) PetscCall(MatSetRandom(bA->m[i][j], rctx));
811     }
812   }
813   PetscFunctionReturn(0);
814 }
815 
816 static PetscErrorCode MatCreateVecs_Nest(Mat A, Vec *right, Vec *left) {
817   Mat_Nest *bA = (Mat_Nest *)A->data;
818   Vec      *L, *R;
819   MPI_Comm  comm;
820   PetscInt  i, j;
821 
822   PetscFunctionBegin;
823   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
824   if (right) {
825     /* allocate R */
826     PetscCall(PetscMalloc1(bA->nc, &R));
827     /* Create the right vectors */
828     for (j = 0; j < bA->nc; j++) {
829       for (i = 0; i < bA->nr; i++) {
830         if (bA->m[i][j]) {
831           PetscCall(MatCreateVecs(bA->m[i][j], &R[j], NULL));
832           break;
833         }
834       }
835       PetscCheck(i != bA->nr, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Mat(Nest) contains a null column.");
836     }
837     PetscCall(VecCreateNest(comm, bA->nc, bA->isglobal.col, R, right));
838     /* hand back control to the nest vector */
839     for (j = 0; j < bA->nc; j++) PetscCall(VecDestroy(&R[j]));
840     PetscCall(PetscFree(R));
841   }
842 
843   if (left) {
844     /* allocate L */
845     PetscCall(PetscMalloc1(bA->nr, &L));
846     /* Create the left vectors */
847     for (i = 0; i < bA->nr; i++) {
848       for (j = 0; j < bA->nc; j++) {
849         if (bA->m[i][j]) {
850           PetscCall(MatCreateVecs(bA->m[i][j], NULL, &L[i]));
851           break;
852         }
853       }
854       PetscCheck(j != bA->nc, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Mat(Nest) contains a null row.");
855     }
856 
857     PetscCall(VecCreateNest(comm, bA->nr, bA->isglobal.row, L, left));
858     for (i = 0; i < bA->nr; i++) PetscCall(VecDestroy(&L[i]));
859 
860     PetscCall(PetscFree(L));
861   }
862   PetscFunctionReturn(0);
863 }
864 
865 static PetscErrorCode MatView_Nest(Mat A, PetscViewer viewer) {
866   Mat_Nest *bA = (Mat_Nest *)A->data;
867   PetscBool isascii, viewSub = PETSC_FALSE;
868   PetscInt  i, j;
869 
870   PetscFunctionBegin;
871   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
872   if (isascii) {
873     PetscCall(PetscOptionsGetBool(((PetscObject)A)->options, ((PetscObject)A)->prefix, "-mat_view_nest_sub", &viewSub, NULL));
874     PetscCall(PetscViewerASCIIPrintf(viewer, "Matrix object: \n"));
875     PetscCall(PetscViewerASCIIPushTab(viewer));
876     PetscCall(PetscViewerASCIIPrintf(viewer, "type=nest, rows=%" PetscInt_FMT ", cols=%" PetscInt_FMT " \n", bA->nr, bA->nc));
877 
878     PetscCall(PetscViewerASCIIPrintf(viewer, "MatNest structure: \n"));
879     for (i = 0; i < bA->nr; i++) {
880       for (j = 0; j < bA->nc; j++) {
881         MatType   type;
882         char      name[256] = "", prefix[256] = "";
883         PetscInt  NR, NC;
884         PetscBool isNest = PETSC_FALSE;
885 
886         if (!bA->m[i][j]) {
887           PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ",%" PetscInt_FMT ") : NULL \n", i, j));
888           continue;
889         }
890         PetscCall(MatGetSize(bA->m[i][j], &NR, &NC));
891         PetscCall(MatGetType(bA->m[i][j], &type));
892         if (((PetscObject)bA->m[i][j])->name) PetscCall(PetscSNPrintf(name, sizeof(name), "name=\"%s\", ", ((PetscObject)bA->m[i][j])->name));
893         if (((PetscObject)bA->m[i][j])->prefix) PetscCall(PetscSNPrintf(prefix, sizeof(prefix), "prefix=\"%s\", ", ((PetscObject)bA->m[i][j])->prefix));
894         PetscCall(PetscObjectTypeCompare((PetscObject)bA->m[i][j], MATNEST, &isNest));
895 
896         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ",%" PetscInt_FMT ") : %s%stype=%s, rows=%" PetscInt_FMT ", cols=%" PetscInt_FMT " \n", i, j, name, prefix, type, NR, NC));
897 
898         if (isNest || viewSub) {
899           PetscCall(PetscViewerASCIIPushTab(viewer)); /* push1 */
900           PetscCall(MatView(bA->m[i][j], viewer));
901           PetscCall(PetscViewerASCIIPopTab(viewer)); /* pop1 */
902         }
903       }
904     }
905     PetscCall(PetscViewerASCIIPopTab(viewer)); /* pop0 */
906   }
907   PetscFunctionReturn(0);
908 }
909 
910 static PetscErrorCode MatZeroEntries_Nest(Mat A) {
911   Mat_Nest *bA = (Mat_Nest *)A->data;
912   PetscInt  i, j;
913 
914   PetscFunctionBegin;
915   for (i = 0; i < bA->nr; i++) {
916     for (j = 0; j < bA->nc; j++) {
917       if (!bA->m[i][j]) continue;
918       PetscCall(MatZeroEntries(bA->m[i][j]));
919     }
920   }
921   PetscFunctionReturn(0);
922 }
923 
924 static PetscErrorCode MatCopy_Nest(Mat A, Mat B, MatStructure str) {
925   Mat_Nest *bA = (Mat_Nest *)A->data, *bB = (Mat_Nest *)B->data;
926   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
927   PetscBool nnzstate = PETSC_FALSE;
928 
929   PetscFunctionBegin;
930   PetscCheck(nr == bB->nr && nc == bB->nc, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Cannot copy a Mat_Nest of block size (%" PetscInt_FMT ",%" PetscInt_FMT ") to a Mat_Nest of block size (%" PetscInt_FMT ",%" PetscInt_FMT ")", bB->nr, bB->nc, nr, nc);
931   for (i = 0; i < nr; i++) {
932     for (j = 0; j < nc; j++) {
933       PetscObjectState subnnzstate = 0;
934       if (bA->m[i][j] && bB->m[i][j]) {
935         PetscCall(MatCopy(bA->m[i][j], bB->m[i][j], str));
936       } else PetscCheck(!bA->m[i][j] && !bB->m[i][j], PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Matrix block does not exist at %" PetscInt_FMT ",%" PetscInt_FMT, i, j);
937       PetscCall(MatGetNonzeroState(bB->m[i][j], &subnnzstate));
938       nnzstate                 = (PetscBool)(nnzstate || bB->nnzstate[i * nc + j] != subnnzstate);
939       bB->nnzstate[i * nc + j] = subnnzstate;
940     }
941   }
942   if (nnzstate) B->nonzerostate++;
943   PetscFunctionReturn(0);
944 }
945 
946 static PetscErrorCode MatAXPY_Nest(Mat Y, PetscScalar a, Mat X, MatStructure str) {
947   Mat_Nest *bY = (Mat_Nest *)Y->data, *bX = (Mat_Nest *)X->data;
948   PetscInt  i, j, nr = bY->nr, nc = bY->nc;
949   PetscBool nnzstate = PETSC_FALSE;
950 
951   PetscFunctionBegin;
952   PetscCheck(nr == bX->nr && nc == bX->nc, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_INCOMP, "Cannot AXPY a MatNest of block size (%" PetscInt_FMT ",%" PetscInt_FMT ") with a MatNest of block size (%" PetscInt_FMT ",%" PetscInt_FMT ")", bX->nr, bX->nc, nr, nc);
953   for (i = 0; i < nr; i++) {
954     for (j = 0; j < nc; j++) {
955       PetscObjectState subnnzstate = 0;
956       if (bY->m[i][j] && bX->m[i][j]) {
957         PetscCall(MatAXPY(bY->m[i][j], a, bX->m[i][j], str));
958       } else if (bX->m[i][j]) {
959         Mat M;
960 
961         PetscCheck(str == DIFFERENT_NONZERO_PATTERN, PetscObjectComm((PetscObject)Y), PETSC_ERR_ARG_INCOMP, "Matrix block does not exist at %" PetscInt_FMT ",%" PetscInt_FMT ". Use DIFFERENT_NONZERO_PATTERN", i, j);
962         PetscCall(MatDuplicate(bX->m[i][j], MAT_COPY_VALUES, &M));
963         PetscCall(MatNestSetSubMat(Y, i, j, M));
964         PetscCall(MatDestroy(&M));
965       }
966       if (bY->m[i][j]) PetscCall(MatGetNonzeroState(bY->m[i][j], &subnnzstate));
967       nnzstate                 = (PetscBool)(nnzstate || bY->nnzstate[i * nc + j] != subnnzstate);
968       bY->nnzstate[i * nc + j] = subnnzstate;
969     }
970   }
971   if (nnzstate) Y->nonzerostate++;
972   PetscFunctionReturn(0);
973 }
974 
975 static PetscErrorCode MatDuplicate_Nest(Mat A, MatDuplicateOption op, Mat *B) {
976   Mat_Nest *bA = (Mat_Nest *)A->data;
977   Mat      *b;
978   PetscInt  i, j, nr = bA->nr, nc = bA->nc;
979 
980   PetscFunctionBegin;
981   PetscCall(PetscMalloc1(nr * nc, &b));
982   for (i = 0; i < nr; i++) {
983     for (j = 0; j < nc; j++) {
984       if (bA->m[i][j]) {
985         PetscCall(MatDuplicate(bA->m[i][j], op, &b[i * nc + j]));
986       } else {
987         b[i * nc + j] = NULL;
988       }
989     }
990   }
991   PetscCall(MatCreateNest(PetscObjectComm((PetscObject)A), nr, bA->isglobal.row, nc, bA->isglobal.col, b, B));
992   /* Give the new MatNest exclusive ownership */
993   for (i = 0; i < nr * nc; i++) PetscCall(MatDestroy(&b[i]));
994   PetscCall(PetscFree(b));
995 
996   PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
997   PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
998   PetscFunctionReturn(0);
999 }
1000 
1001 /* nest api */
1002 PetscErrorCode MatNestGetSubMat_Nest(Mat A, PetscInt idxm, PetscInt jdxm, Mat *mat) {
1003   Mat_Nest *bA = (Mat_Nest *)A->data;
1004 
1005   PetscFunctionBegin;
1006   PetscCheck(idxm < bA->nr, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm, bA->nr - 1);
1007   PetscCheck(jdxm < bA->nc, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "Col too large: row %" PetscInt_FMT " max %" PetscInt_FMT, jdxm, bA->nc - 1);
1008   *mat = bA->m[idxm][jdxm];
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 /*@
1013  MatNestGetSubMat - Returns a single, sub-matrix from a `MATNEST`
1014 
1015  Not collective
1016 
1017  Input Parameters:
1018 +   A  - `MATNEST` matrix
1019 .   idxm - index of the matrix within the nest matrix
1020 -   jdxm - index of the matrix within the nest matrix
1021 
1022  Output Parameter:
1023 .   sub - matrix at index idxm,jdxm within the nest matrix
1024 
1025  Level: developer
1026 
1027 .seealso: `MATNEST`, `MatNestGetSize()`, `MatNestGetSubMats()`, `MatCreateNest()`, `MATNEST`, `MatNestSetSubMat()`,
1028           `MatNestGetLocalISs()`, `MatNestGetISs()`
1029 @*/
1030 PetscErrorCode MatNestGetSubMat(Mat A, PetscInt idxm, PetscInt jdxm, Mat *sub) {
1031   PetscFunctionBegin;
1032   PetscUseMethod(A, "MatNestGetSubMat_C", (Mat, PetscInt, PetscInt, Mat *), (A, idxm, jdxm, sub));
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 PetscErrorCode MatNestSetSubMat_Nest(Mat A, PetscInt idxm, PetscInt jdxm, Mat mat) {
1037   Mat_Nest *bA = (Mat_Nest *)A->data;
1038   PetscInt  m, n, M, N, mi, ni, Mi, Ni;
1039 
1040   PetscFunctionBegin;
1041   PetscCheck(idxm < bA->nr, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm, bA->nr - 1);
1042   PetscCheck(jdxm < bA->nc, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "Col too large: row %" PetscInt_FMT " max %" PetscInt_FMT, jdxm, bA->nc - 1);
1043   PetscCall(MatGetLocalSize(mat, &m, &n));
1044   PetscCall(MatGetSize(mat, &M, &N));
1045   PetscCall(ISGetLocalSize(bA->isglobal.row[idxm], &mi));
1046   PetscCall(ISGetSize(bA->isglobal.row[idxm], &Mi));
1047   PetscCall(ISGetLocalSize(bA->isglobal.col[jdxm], &ni));
1048   PetscCall(ISGetSize(bA->isglobal.col[jdxm], &Ni));
1049   PetscCheck(M == Mi && N == Ni, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_INCOMP, "Submatrix dimension (%" PetscInt_FMT ",%" PetscInt_FMT ") incompatible with nest block (%" PetscInt_FMT ",%" PetscInt_FMT ")", M, N, Mi, Ni);
1050   PetscCheck(m == mi && n == ni, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_INCOMP, "Submatrix local dimension (%" PetscInt_FMT ",%" PetscInt_FMT ") incompatible with nest block (%" PetscInt_FMT ",%" PetscInt_FMT ")", m, n, mi, ni);
1051 
1052   /* do not increase object state */
1053   if (mat == bA->m[idxm][jdxm]) PetscFunctionReturn(0);
1054 
1055   PetscCall(PetscObjectReference((PetscObject)mat));
1056   PetscCall(MatDestroy(&bA->m[idxm][jdxm]));
1057   bA->m[idxm][jdxm] = mat;
1058   PetscCall(PetscObjectStateIncrease((PetscObject)A));
1059   PetscCall(MatGetNonzeroState(mat, &bA->nnzstate[idxm * bA->nc + jdxm]));
1060   A->nonzerostate++;
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 /*@
1065  MatNestSetSubMat - Set a single submatrix in the `MATNEST`
1066 
1067  Logically collective on the submatrix communicator
1068 
1069  Input Parameters:
1070 +   A  - `MATNEST` matrix
1071 .   idxm - index of the matrix within the nest matrix
1072 .   jdxm - index of the matrix within the nest matrix
1073 -   sub - matrix at index idxm,jdxm within the nest matrix
1074 
1075  Notes:
1076  The new submatrix must have the same size and communicator as that block of the nest.
1077 
1078  This increments the reference count of the submatrix.
1079 
1080  Level: developer
1081 
1082 .seealso: `MATNEST`, `MatNestSetSubMats()`, `MatNestGetSubMats()`, `MatNestGetLocalISs()`, `MATNEST`, `MatCreateNest()`,
1083           `MatNestGetSubMat()`, `MatNestGetISs()`, `MatNestGetSize()`
1084 @*/
1085 PetscErrorCode MatNestSetSubMat(Mat A, PetscInt idxm, PetscInt jdxm, Mat sub) {
1086   PetscFunctionBegin;
1087   PetscUseMethod(A, "MatNestSetSubMat_C", (Mat, PetscInt, PetscInt, Mat), (A, idxm, jdxm, sub));
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 PetscErrorCode MatNestGetSubMats_Nest(Mat A, PetscInt *M, PetscInt *N, Mat ***mat) {
1092   Mat_Nest *bA = (Mat_Nest *)A->data;
1093 
1094   PetscFunctionBegin;
1095   if (M) *M = bA->nr;
1096   if (N) *N = bA->nc;
1097   if (mat) *mat = bA->m;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 /*@C
1102  MatNestGetSubMats - Returns the entire two dimensional array of matrices defining a `MATNEST` matrix.
1103 
1104  Not collective
1105 
1106  Input Parameter:
1107 .   A  - nest matrix
1108 
1109  Output Parameters:
1110 +   M - number of rows in the nest matrix
1111 .   N - number of cols in the nest matrix
1112 -   mat - 2d array of matrices
1113 
1114  Note:
1115  The user should not free the array mat.
1116 
1117  Fortran Note:
1118  This routine has a calling sequence
1119 $   call MatNestGetSubMats(A, M, N, mat, ierr)
1120  where the space allocated for the optional argument mat is assumed large enough (if provided).
1121 
1122  Level: developer
1123 
1124 .seealso: `MATNEST`, `MatNestGetSize()`, `MatNestGetSubMat()`, `MatNestGetLocalISs()`, `MATNEST`, `MatCreateNest()`,
1125           `MatNestSetSubMats()`, `MatNestGetISs()`, `MatNestSetSubMat()`
1126 @*/
1127 PetscErrorCode MatNestGetSubMats(Mat A, PetscInt *M, PetscInt *N, Mat ***mat) {
1128   PetscFunctionBegin;
1129   PetscUseMethod(A, "MatNestGetSubMats_C", (Mat, PetscInt *, PetscInt *, Mat ***), (A, M, N, mat));
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 PetscErrorCode MatNestGetSize_Nest(Mat A, PetscInt *M, PetscInt *N) {
1134   Mat_Nest *bA = (Mat_Nest *)A->data;
1135 
1136   PetscFunctionBegin;
1137   if (M) *M = bA->nr;
1138   if (N) *N = bA->nc;
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 /*@
1143  MatNestGetSize - Returns the size of the `MATNEST` matrix.
1144 
1145  Not collective
1146 
1147  Input Parameter:
1148 .   A  - `MATNEST` matrix
1149 
1150  Output Parameters:
1151 +   M - number of rows in the nested mat
1152 -   N - number of cols in the nested mat
1153 
1154  Level: developer
1155 
1156 .seealso: `MATNEST`, `MatNestGetSubMat()`, `MatNestGetSubMats()`, `MATNEST`, `MatCreateNest()`, `MatNestGetLocalISs()`,
1157           `MatNestGetISs()`
1158 @*/
1159 PetscErrorCode MatNestGetSize(Mat A, PetscInt *M, PetscInt *N) {
1160   PetscFunctionBegin;
1161   PetscUseMethod(A, "MatNestGetSize_C", (Mat, PetscInt *, PetscInt *), (A, M, N));
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 static PetscErrorCode MatNestGetISs_Nest(Mat A, IS rows[], IS cols[]) {
1166   Mat_Nest *vs = (Mat_Nest *)A->data;
1167   PetscInt  i;
1168 
1169   PetscFunctionBegin;
1170   if (rows)
1171     for (i = 0; i < vs->nr; i++) rows[i] = vs->isglobal.row[i];
1172   if (cols)
1173     for (i = 0; i < vs->nc; i++) cols[i] = vs->isglobal.col[i];
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 /*@C
1178  MatNestGetISs - Returns the index sets partitioning the row and column spaces of a `MATNEST`
1179 
1180  Not collective
1181 
1182  Input Parameter:
1183 .   A  - `MATNEST` matrix
1184 
1185  Output Parameters:
1186 +   rows - array of row index sets
1187 -   cols - array of column index sets
1188 
1189  Level: advanced
1190 
1191  Note:
1192  The user must have allocated arrays of the correct size. The reference count is not increased on the returned ISs.
1193 
1194 .seealso: `MATNEST`, `MatNestGetSubMat()`, `MatNestGetSubMats()`, `MatNestGetSize()`, `MatNestGetLocalISs()`, `MATNEST`,
1195           `MatCreateNest()`, `MatNestGetSubMats()`, `MatNestSetSubMats()`
1196 @*/
1197 PetscErrorCode MatNestGetISs(Mat A, IS rows[], IS cols[]) {
1198   PetscFunctionBegin;
1199   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1200   PetscUseMethod(A, "MatNestGetISs_C", (Mat, IS[], IS[]), (A, rows, cols));
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 static PetscErrorCode MatNestGetLocalISs_Nest(Mat A, IS rows[], IS cols[]) {
1205   Mat_Nest *vs = (Mat_Nest *)A->data;
1206   PetscInt  i;
1207 
1208   PetscFunctionBegin;
1209   if (rows)
1210     for (i = 0; i < vs->nr; i++) rows[i] = vs->islocal.row[i];
1211   if (cols)
1212     for (i = 0; i < vs->nc; i++) cols[i] = vs->islocal.col[i];
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 /*@C
1217  MatNestGetLocalISs - Returns the index sets partitioning the row and column spaces of a `MATNEST`
1218 
1219  Not collective
1220 
1221  Input Parameter:
1222 .   A  - `MATNEST` matrix
1223 
1224  Output Parameters:
1225 +   rows - array of row index sets (or NULL to ignore)
1226 -   cols - array of column index sets (or NULL to ignore)
1227 
1228  Level: advanced
1229 
1230  Note:
1231  The user must have allocated arrays of the correct size. The reference count is not increased on the returned ISs.
1232 
1233 .seealso: `MATNEST`, `MatNestGetSubMat()`, `MatNestGetSubMats()`, `MatNestGetSize()`, `MatNestGetISs()`, `MatCreateNest()`,
1234           `MATNEST`, `MatNestSetSubMats()`, `MatNestSetSubMat()`
1235 @*/
1236 PetscErrorCode MatNestGetLocalISs(Mat A, IS rows[], IS cols[]) {
1237   PetscFunctionBegin;
1238   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1239   PetscUseMethod(A, "MatNestGetLocalISs_C", (Mat, IS[], IS[]), (A, rows, cols));
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 PetscErrorCode MatNestSetVecType_Nest(Mat A, VecType vtype) {
1244   PetscBool flg;
1245 
1246   PetscFunctionBegin;
1247   PetscCall(PetscStrcmp(vtype, VECNEST, &flg));
1248   /* In reality, this only distinguishes VECNEST and "other" */
1249   if (flg) A->ops->getvecs = MatCreateVecs_Nest;
1250   else A->ops->getvecs = (PetscErrorCode(*)(Mat, Vec *, Vec *))0;
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 /*@C
1255  MatNestSetVecType - Sets the type of `Vec` returned by `MatCreateVecs()`
1256 
1257  Not collective
1258 
1259  Input Parameters:
1260 +  A  - `MATNEST` matrix
1261 -  vtype - `VecType` to use for creating vectors
1262 
1263  Level: developer
1264 
1265 .seealso: `MATNEST`, `MatCreateVecs()`, `MATNEST`, `MatCreateNest()`, `VecType`
1266 @*/
1267 PetscErrorCode MatNestSetVecType(Mat A, VecType vtype) {
1268   PetscFunctionBegin;
1269   PetscTryMethod(A, "MatNestSetVecType_C", (Mat, VecType), (A, vtype));
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 PetscErrorCode MatNestSetSubMats_Nest(Mat A, PetscInt nr, const IS is_row[], PetscInt nc, const IS is_col[], const Mat a[]) {
1274   Mat_Nest *s = (Mat_Nest *)A->data;
1275   PetscInt  i, j, m, n, M, N;
1276   PetscBool cong, isstd, sametype = PETSC_FALSE;
1277   VecType   vtype, type;
1278 
1279   PetscFunctionBegin;
1280   PetscCall(MatReset_Nest(A));
1281 
1282   s->nr = nr;
1283   s->nc = nc;
1284 
1285   /* Create space for submatrices */
1286   PetscCall(PetscMalloc1(nr, &s->m));
1287   for (i = 0; i < nr; i++) PetscCall(PetscMalloc1(nc, &s->m[i]));
1288   for (i = 0; i < nr; i++) {
1289     for (j = 0; j < nc; j++) {
1290       s->m[i][j] = a[i * nc + j];
1291       if (a[i * nc + j]) PetscCall(PetscObjectReference((PetscObject)a[i * nc + j]));
1292     }
1293   }
1294   PetscCall(MatGetVecType(A, &vtype));
1295   PetscCall(PetscStrcmp(vtype, VECSTANDARD, &isstd));
1296   if (isstd) {
1297     /* check if all blocks have the same vectype */
1298     vtype = NULL;
1299     for (i = 0; i < nr; i++) {
1300       for (j = 0; j < nc; j++) {
1301         if (a[i * nc + j]) {
1302           if (!vtype) { /* first visited block */
1303             PetscCall(MatGetVecType(a[i * nc + j], &vtype));
1304             sametype = PETSC_TRUE;
1305           } else if (sametype) {
1306             PetscCall(MatGetVecType(a[i * nc + j], &type));
1307             PetscCall(PetscStrcmp(vtype, type, &sametype));
1308           }
1309         }
1310       }
1311     }
1312     if (sametype) { /* propagate vectype */
1313       PetscCall(MatSetVecType(A, vtype));
1314     }
1315   }
1316 
1317   PetscCall(MatSetUp_NestIS_Private(A, nr, is_row, nc, is_col));
1318 
1319   PetscCall(PetscMalloc1(nr, &s->row_len));
1320   PetscCall(PetscMalloc1(nc, &s->col_len));
1321   for (i = 0; i < nr; i++) s->row_len[i] = -1;
1322   for (j = 0; j < nc; j++) s->col_len[j] = -1;
1323 
1324   PetscCall(PetscCalloc1(nr * nc, &s->nnzstate));
1325   for (i = 0; i < nr; i++) {
1326     for (j = 0; j < nc; j++) {
1327       if (s->m[i][j]) PetscCall(MatGetNonzeroState(s->m[i][j], &s->nnzstate[i * nc + j]));
1328     }
1329   }
1330 
1331   PetscCall(MatNestGetSizes_Private(A, &m, &n, &M, &N));
1332 
1333   PetscCall(PetscLayoutSetSize(A->rmap, M));
1334   PetscCall(PetscLayoutSetLocalSize(A->rmap, m));
1335   PetscCall(PetscLayoutSetSize(A->cmap, N));
1336   PetscCall(PetscLayoutSetLocalSize(A->cmap, n));
1337 
1338   PetscCall(PetscLayoutSetUp(A->rmap));
1339   PetscCall(PetscLayoutSetUp(A->cmap));
1340 
1341   /* disable operations that are not supported for non-square matrices,
1342      or matrices for which is_row != is_col  */
1343   PetscCall(MatHasCongruentLayouts(A, &cong));
1344   if (cong && nr != nc) cong = PETSC_FALSE;
1345   if (cong) {
1346     for (i = 0; cong && i < nr; i++) PetscCall(ISEqualUnsorted(s->isglobal.row[i], s->isglobal.col[i], &cong));
1347   }
1348   if (!cong) {
1349     A->ops->missingdiagonal = NULL;
1350     A->ops->getdiagonal     = NULL;
1351     A->ops->shift           = NULL;
1352     A->ops->diagonalset     = NULL;
1353   }
1354 
1355   PetscCall(PetscCalloc2(nr, &s->left, nc, &s->right));
1356   PetscCall(PetscObjectStateIncrease((PetscObject)A));
1357   A->nonzerostate++;
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 /*@
1362    MatNestSetSubMats - Sets the nested submatrices in a `MATNEST`
1363 
1364    Collective on A
1365 
1366    Input Parameters:
1367 +  A - `MATNEST` matrix
1368 .  nr - number of nested row blocks
1369 .  is_row - index sets for each nested row block, or NULL to make contiguous
1370 .  nc - number of nested column blocks
1371 .  is_col - index sets for each nested column block, or NULL to make contiguous
1372 -  a - row-aligned array of nr*nc submatrices, empty submatrices can be passed using NULL
1373 
1374    Note:
1375    This always resets any submatrix information previously set
1376 
1377    Level: advanced
1378 
1379 .seealso: `MATNEST`, `MatCreateNest()`, `MATNEST`, `MatNestSetSubMat()`, `MatNestGetSubMat()`, `MatNestGetSubMats()`
1380 @*/
1381 PetscErrorCode MatNestSetSubMats(Mat A, PetscInt nr, const IS is_row[], PetscInt nc, const IS is_col[], const Mat a[]) {
1382   PetscInt i;
1383 
1384   PetscFunctionBegin;
1385   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
1386   PetscCheck(nr >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "Number of rows cannot be negative");
1387   if (nr && is_row) {
1388     PetscValidPointer(is_row, 3);
1389     for (i = 0; i < nr; i++) PetscValidHeaderSpecific(is_row[i], IS_CLASSID, 3);
1390   }
1391   PetscCheck(nc >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "Number of columns cannot be negative");
1392   if (nc && is_col) {
1393     PetscValidPointer(is_col, 5);
1394     for (i = 0; i < nc; i++) PetscValidHeaderSpecific(is_col[i], IS_CLASSID, 5);
1395   }
1396   if (nr * nc > 0) PetscValidPointer(a, 6);
1397   PetscUseMethod(A, "MatNestSetSubMats_C", (Mat, PetscInt, const IS[], PetscInt, const IS[], const Mat[]), (A, nr, is_row, nc, is_col, a));
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 static PetscErrorCode MatNestCreateAggregateL2G_Private(Mat A, PetscInt n, const IS islocal[], const IS isglobal[], PetscBool colflg, ISLocalToGlobalMapping *ltog) {
1402   PetscBool flg;
1403   PetscInt  i, j, m, mi, *ix;
1404 
1405   PetscFunctionBegin;
1406   *ltog = NULL;
1407   for (i = 0, m = 0, flg = PETSC_FALSE; i < n; i++) {
1408     if (islocal[i]) {
1409       PetscCall(ISGetLocalSize(islocal[i], &mi));
1410       flg = PETSC_TRUE; /* We found a non-trivial entry */
1411     } else {
1412       PetscCall(ISGetLocalSize(isglobal[i], &mi));
1413     }
1414     m += mi;
1415   }
1416   if (!flg) PetscFunctionReturn(0);
1417 
1418   PetscCall(PetscMalloc1(m, &ix));
1419   for (i = 0, m = 0; i < n; i++) {
1420     ISLocalToGlobalMapping smap = NULL;
1421     Mat                    sub  = NULL;
1422     PetscSF                sf;
1423     PetscLayout            map;
1424     const PetscInt        *ix2;
1425 
1426     if (!colflg) {
1427       PetscCall(MatNestFindNonzeroSubMatRow(A, i, &sub));
1428     } else {
1429       PetscCall(MatNestFindNonzeroSubMatCol(A, i, &sub));
1430     }
1431     if (sub) {
1432       if (!colflg) {
1433         PetscCall(MatGetLocalToGlobalMapping(sub, &smap, NULL));
1434       } else {
1435         PetscCall(MatGetLocalToGlobalMapping(sub, NULL, &smap));
1436       }
1437     }
1438     /*
1439        Now we need to extract the monolithic global indices that correspond to the given split global indices.
1440        In many/most cases, we only want MatGetLocalSubMatrix() to work, in which case we only need to know the size of the local spaces.
1441     */
1442     PetscCall(ISGetIndices(isglobal[i], &ix2));
1443     if (islocal[i]) {
1444       PetscInt *ilocal, *iremote;
1445       PetscInt  mil, nleaves;
1446 
1447       PetscCall(ISGetLocalSize(islocal[i], &mi));
1448       PetscCheck(smap, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "Missing local to global map");
1449       for (j = 0; j < mi; j++) ix[m + j] = j;
1450       PetscCall(ISLocalToGlobalMappingApply(smap, mi, ix + m, ix + m));
1451 
1452       /* PetscSFSetGraphLayout does not like negative indices */
1453       PetscCall(PetscMalloc2(mi, &ilocal, mi, &iremote));
1454       for (j = 0, nleaves = 0; j < mi; j++) {
1455         if (ix[m + j] < 0) continue;
1456         ilocal[nleaves]  = j;
1457         iremote[nleaves] = ix[m + j];
1458         nleaves++;
1459       }
1460       PetscCall(ISGetLocalSize(isglobal[i], &mil));
1461       PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
1462       PetscCall(PetscLayoutCreate(PetscObjectComm((PetscObject)A), &map));
1463       PetscCall(PetscLayoutSetLocalSize(map, mil));
1464       PetscCall(PetscLayoutSetUp(map));
1465       PetscCall(PetscSFSetGraphLayout(sf, map, nleaves, ilocal, PETSC_USE_POINTER, iremote));
1466       PetscCall(PetscLayoutDestroy(&map));
1467       PetscCall(PetscSFBcastBegin(sf, MPIU_INT, ix2, ix + m, MPI_REPLACE));
1468       PetscCall(PetscSFBcastEnd(sf, MPIU_INT, ix2, ix + m, MPI_REPLACE));
1469       PetscCall(PetscSFDestroy(&sf));
1470       PetscCall(PetscFree2(ilocal, iremote));
1471     } else {
1472       PetscCall(ISGetLocalSize(isglobal[i], &mi));
1473       for (j = 0; j < mi; j++) ix[m + j] = ix2[i];
1474     }
1475     PetscCall(ISRestoreIndices(isglobal[i], &ix2));
1476     m += mi;
1477   }
1478   PetscCall(ISLocalToGlobalMappingCreate(PetscObjectComm((PetscObject)A), 1, m, ix, PETSC_OWN_POINTER, ltog));
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 /* If an IS was provided, there is nothing Nest needs to do, otherwise Nest will build a strided IS */
1483 /*
1484   nprocessors = NP
1485   Nest x^T = ((g_0,g_1,...g_nprocs-1), (h_0,h_1,...h_NP-1))
1486        proc 0: => (g_0,h_0,)
1487        proc 1: => (g_1,h_1,)
1488        ...
1489        proc nprocs-1: => (g_NP-1,h_NP-1,)
1490 
1491             proc 0:                      proc 1:                    proc nprocs-1:
1492     is[0] = (0,1,2,...,nlocal(g_0)-1)  (0,1,...,nlocal(g_1)-1)  (0,1,...,nlocal(g_NP-1))
1493 
1494             proc 0:
1495     is[1] = (nlocal(g_0),nlocal(g_0)+1,...,nlocal(g_0)+nlocal(h_0)-1)
1496             proc 1:
1497     is[1] = (nlocal(g_1),nlocal(g_1)+1,...,nlocal(g_1)+nlocal(h_1)-1)
1498 
1499             proc NP-1:
1500     is[1] = (nlocal(g_NP-1),nlocal(g_NP-1)+1,...,nlocal(g_NP-1)+nlocal(h_NP-1)-1)
1501 */
1502 static PetscErrorCode MatSetUp_NestIS_Private(Mat A, PetscInt nr, const IS is_row[], PetscInt nc, const IS is_col[]) {
1503   Mat_Nest *vs = (Mat_Nest *)A->data;
1504   PetscInt  i, j, offset, n, nsum, bs;
1505   Mat       sub = NULL;
1506 
1507   PetscFunctionBegin;
1508   PetscCall(PetscMalloc1(nr, &vs->isglobal.row));
1509   PetscCall(PetscMalloc1(nc, &vs->isglobal.col));
1510   if (is_row) { /* valid IS is passed in */
1511     /* refs on is[] are incremented */
1512     for (i = 0; i < vs->nr; i++) {
1513       PetscCall(PetscObjectReference((PetscObject)is_row[i]));
1514 
1515       vs->isglobal.row[i] = is_row[i];
1516     }
1517   } else { /* Create the ISs by inspecting sizes of a submatrix in each row */
1518     nsum = 0;
1519     for (i = 0; i < vs->nr; i++) { /* Add up the local sizes to compute the aggregate offset */
1520       PetscCall(MatNestFindNonzeroSubMatRow(A, i, &sub));
1521       PetscCheck(sub, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "No nonzero submatrix in row %" PetscInt_FMT, i);
1522       PetscCall(MatGetLocalSize(sub, &n, NULL));
1523       PetscCheck(n >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Sizes have not yet been set for submatrix");
1524       nsum += n;
1525     }
1526     PetscCallMPI(MPI_Scan(&nsum, &offset, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)A)));
1527     offset -= nsum;
1528     for (i = 0; i < vs->nr; i++) {
1529       PetscCall(MatNestFindNonzeroSubMatRow(A, i, &sub));
1530       PetscCall(MatGetLocalSize(sub, &n, NULL));
1531       PetscCall(MatGetBlockSizes(sub, &bs, NULL));
1532       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub), n, offset, 1, &vs->isglobal.row[i]));
1533       PetscCall(ISSetBlockSize(vs->isglobal.row[i], bs));
1534       offset += n;
1535     }
1536   }
1537 
1538   if (is_col) { /* valid IS is passed in */
1539     /* refs on is[] are incremented */
1540     for (j = 0; j < vs->nc; j++) {
1541       PetscCall(PetscObjectReference((PetscObject)is_col[j]));
1542 
1543       vs->isglobal.col[j] = is_col[j];
1544     }
1545   } else { /* Create the ISs by inspecting sizes of a submatrix in each column */
1546     offset = A->cmap->rstart;
1547     nsum   = 0;
1548     for (j = 0; j < vs->nc; j++) {
1549       PetscCall(MatNestFindNonzeroSubMatCol(A, j, &sub));
1550       PetscCheck(sub, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "No nonzero submatrix in column %" PetscInt_FMT, i);
1551       PetscCall(MatGetLocalSize(sub, NULL, &n));
1552       PetscCheck(n >= 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Sizes have not yet been set for submatrix");
1553       nsum += n;
1554     }
1555     PetscCallMPI(MPI_Scan(&nsum, &offset, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)A)));
1556     offset -= nsum;
1557     for (j = 0; j < vs->nc; j++) {
1558       PetscCall(MatNestFindNonzeroSubMatCol(A, j, &sub));
1559       PetscCall(MatGetLocalSize(sub, NULL, &n));
1560       PetscCall(MatGetBlockSizes(sub, NULL, &bs));
1561       PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub), n, offset, 1, &vs->isglobal.col[j]));
1562       PetscCall(ISSetBlockSize(vs->isglobal.col[j], bs));
1563       offset += n;
1564     }
1565   }
1566 
1567   /* Set up the local ISs */
1568   PetscCall(PetscMalloc1(vs->nr, &vs->islocal.row));
1569   PetscCall(PetscMalloc1(vs->nc, &vs->islocal.col));
1570   for (i = 0, offset = 0; i < vs->nr; i++) {
1571     IS                     isloc;
1572     ISLocalToGlobalMapping rmap = NULL;
1573     PetscInt               nlocal, bs;
1574     PetscCall(MatNestFindNonzeroSubMatRow(A, i, &sub));
1575     if (sub) PetscCall(MatGetLocalToGlobalMapping(sub, &rmap, NULL));
1576     if (rmap) {
1577       PetscCall(MatGetBlockSizes(sub, &bs, NULL));
1578       PetscCall(ISLocalToGlobalMappingGetSize(rmap, &nlocal));
1579       PetscCall(ISCreateStride(PETSC_COMM_SELF, nlocal, offset, 1, &isloc));
1580       PetscCall(ISSetBlockSize(isloc, bs));
1581     } else {
1582       nlocal = 0;
1583       isloc  = NULL;
1584     }
1585     vs->islocal.row[i] = isloc;
1586     offset += nlocal;
1587   }
1588   for (i = 0, offset = 0; i < vs->nc; i++) {
1589     IS                     isloc;
1590     ISLocalToGlobalMapping cmap = NULL;
1591     PetscInt               nlocal, bs;
1592     PetscCall(MatNestFindNonzeroSubMatCol(A, i, &sub));
1593     if (sub) PetscCall(MatGetLocalToGlobalMapping(sub, NULL, &cmap));
1594     if (cmap) {
1595       PetscCall(MatGetBlockSizes(sub, NULL, &bs));
1596       PetscCall(ISLocalToGlobalMappingGetSize(cmap, &nlocal));
1597       PetscCall(ISCreateStride(PETSC_COMM_SELF, nlocal, offset, 1, &isloc));
1598       PetscCall(ISSetBlockSize(isloc, bs));
1599     } else {
1600       nlocal = 0;
1601       isloc  = NULL;
1602     }
1603     vs->islocal.col[i] = isloc;
1604     offset += nlocal;
1605   }
1606 
1607   /* Set up the aggregate ISLocalToGlobalMapping */
1608   {
1609     ISLocalToGlobalMapping rmap, cmap;
1610     PetscCall(MatNestCreateAggregateL2G_Private(A, vs->nr, vs->islocal.row, vs->isglobal.row, PETSC_FALSE, &rmap));
1611     PetscCall(MatNestCreateAggregateL2G_Private(A, vs->nc, vs->islocal.col, vs->isglobal.col, PETSC_TRUE, &cmap));
1612     if (rmap && cmap) PetscCall(MatSetLocalToGlobalMapping(A, rmap, cmap));
1613     PetscCall(ISLocalToGlobalMappingDestroy(&rmap));
1614     PetscCall(ISLocalToGlobalMappingDestroy(&cmap));
1615   }
1616 
1617   if (PetscDefined(USE_DEBUG)) {
1618     for (i = 0; i < vs->nr; i++) {
1619       for (j = 0; j < vs->nc; j++) {
1620         PetscInt m, n, M, N, mi, ni, Mi, Ni;
1621         Mat      B = vs->m[i][j];
1622         if (!B) continue;
1623         PetscCall(MatGetSize(B, &M, &N));
1624         PetscCall(MatGetLocalSize(B, &m, &n));
1625         PetscCall(ISGetSize(vs->isglobal.row[i], &Mi));
1626         PetscCall(ISGetSize(vs->isglobal.col[j], &Ni));
1627         PetscCall(ISGetLocalSize(vs->isglobal.row[i], &mi));
1628         PetscCall(ISGetLocalSize(vs->isglobal.col[j], &ni));
1629         PetscCheck(M == Mi && N == Ni, PetscObjectComm((PetscObject)sub), PETSC_ERR_ARG_INCOMP, "Global sizes (%" PetscInt_FMT ",%" PetscInt_FMT ") of nested submatrix (%" PetscInt_FMT ",%" PetscInt_FMT ") do not agree with space defined by index sets (%" PetscInt_FMT ",%" PetscInt_FMT ")", M, N, i, j, Mi, Ni);
1630         PetscCheck(m == mi && n == ni, PetscObjectComm((PetscObject)sub), PETSC_ERR_ARG_INCOMP, "Local sizes (%" PetscInt_FMT ",%" PetscInt_FMT ") of nested submatrix (%" PetscInt_FMT ",%" PetscInt_FMT ") do not agree with space defined by index sets (%" PetscInt_FMT ",%" PetscInt_FMT ")", m, n, i, j, mi, ni);
1631       }
1632     }
1633   }
1634 
1635   /* Set A->assembled if all non-null blocks are currently assembled */
1636   for (i = 0; i < vs->nr; i++) {
1637     for (j = 0; j < vs->nc; j++) {
1638       if (vs->m[i][j] && !vs->m[i][j]->assembled) PetscFunctionReturn(0);
1639     }
1640   }
1641   A->assembled = PETSC_TRUE;
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 /*@C
1646    MatCreateNest - Creates a new `MATNEST` matrix containing several nested submatrices, each stored separately
1647 
1648    Collective
1649 
1650    Input Parameters:
1651 +  comm - Communicator for the new `MATNEST`
1652 .  nr - number of nested row blocks
1653 .  is_row - index sets for each nested row block, or NULL to make contiguous
1654 .  nc - number of nested column blocks
1655 .  is_col - index sets for each nested column block, or NULL to make contiguous
1656 -  a - row-aligned array of nr*nc submatrices, empty submatrices can be passed using NULL
1657 
1658    Output Parameter:
1659 .  B - new matrix
1660 
1661    Level: advanced
1662 
1663 .seealso: `MATNEST`, `MatCreate()`, `VecCreateNest()`, `DMCreateMatrix()`, `MATNEST`, `MatNestSetSubMat()`,
1664           `MatNestGetSubMat()`, `MatNestGetLocalISs()`, `MatNestGetSize()`,
1665           `MatNestGetISs()`, `MatNestSetSubMats()`, `MatNestGetSubMats()`
1666 @*/
1667 PetscErrorCode MatCreateNest(MPI_Comm comm, PetscInt nr, const IS is_row[], PetscInt nc, const IS is_col[], const Mat a[], Mat *B) {
1668   Mat A;
1669 
1670   PetscFunctionBegin;
1671   *B = NULL;
1672   PetscCall(MatCreate(comm, &A));
1673   PetscCall(MatSetType(A, MATNEST));
1674   A->preallocated = PETSC_TRUE;
1675   PetscCall(MatNestSetSubMats(A, nr, is_row, nc, is_col, a));
1676   *B = A;
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 PetscErrorCode MatConvert_Nest_SeqAIJ_fast(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1681   Mat_Nest     *nest = (Mat_Nest *)A->data;
1682   Mat          *trans;
1683   PetscScalar **avv;
1684   PetscScalar  *vv;
1685   PetscInt    **aii, **ajj;
1686   PetscInt     *ii, *jj, *ci;
1687   PetscInt      nr, nc, nnz, i, j;
1688   PetscBool     done;
1689 
1690   PetscFunctionBegin;
1691   PetscCall(MatGetSize(A, &nr, &nc));
1692   if (reuse == MAT_REUSE_MATRIX) {
1693     PetscInt rnr;
1694 
1695     PetscCall(MatGetRowIJ(*newmat, 0, PETSC_FALSE, PETSC_FALSE, &rnr, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
1696     PetscCheck(done, PetscObjectComm((PetscObject)A), PETSC_ERR_PLIB, "MatGetRowIJ");
1697     PetscCheck(rnr == nr, PetscObjectComm((PetscObject)A), PETSC_ERR_USER, "Cannot reuse matrix, wrong number of rows");
1698     PetscCall(MatSeqAIJGetArray(*newmat, &vv));
1699   }
1700   /* extract CSR for nested SeqAIJ matrices */
1701   nnz = 0;
1702   PetscCall(PetscCalloc4(nest->nr * nest->nc, &aii, nest->nr * nest->nc, &ajj, nest->nr * nest->nc, &avv, nest->nr * nest->nc, &trans));
1703   for (i = 0; i < nest->nr; ++i) {
1704     for (j = 0; j < nest->nc; ++j) {
1705       Mat B = nest->m[i][j];
1706       if (B) {
1707         PetscScalar *naa;
1708         PetscInt    *nii, *njj, nnr;
1709         PetscBool    istrans;
1710 
1711         PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSE, &istrans));
1712         if (istrans) {
1713           Mat Bt;
1714 
1715           PetscCall(MatTransposeGetMat(B, &Bt));
1716           PetscCall(MatTranspose(Bt, MAT_INITIAL_MATRIX, &trans[i * nest->nc + j]));
1717           B = trans[i * nest->nc + j];
1718         }
1719         PetscCall(MatGetRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &nnr, (const PetscInt **)&nii, (const PetscInt **)&njj, &done));
1720         PetscCheck(done, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "MatGetRowIJ");
1721         PetscCall(MatSeqAIJGetArray(B, &naa));
1722         nnz += nii[nnr];
1723 
1724         aii[i * nest->nc + j] = nii;
1725         ajj[i * nest->nc + j] = njj;
1726         avv[i * nest->nc + j] = naa;
1727       }
1728     }
1729   }
1730   if (reuse != MAT_REUSE_MATRIX) {
1731     PetscCall(PetscMalloc1(nr + 1, &ii));
1732     PetscCall(PetscMalloc1(nnz, &jj));
1733     PetscCall(PetscMalloc1(nnz, &vv));
1734   } else {
1735     PetscCheck(nnz == ii[nr], PetscObjectComm((PetscObject)A), PETSC_ERR_USER, "Cannot reuse matrix, wrong number of nonzeros");
1736   }
1737 
1738   /* new row pointer */
1739   PetscCall(PetscArrayzero(ii, nr + 1));
1740   for (i = 0; i < nest->nr; ++i) {
1741     PetscInt ncr, rst;
1742 
1743     PetscCall(ISStrideGetInfo(nest->isglobal.row[i], &rst, NULL));
1744     PetscCall(ISGetLocalSize(nest->isglobal.row[i], &ncr));
1745     for (j = 0; j < nest->nc; ++j) {
1746       if (aii[i * nest->nc + j]) {
1747         PetscInt *nii = aii[i * nest->nc + j];
1748         PetscInt  ir;
1749 
1750         for (ir = rst; ir < ncr + rst; ++ir) {
1751           ii[ir + 1] += nii[1] - nii[0];
1752           nii++;
1753         }
1754       }
1755     }
1756   }
1757   for (i = 0; i < nr; i++) ii[i + 1] += ii[i];
1758 
1759   /* construct CSR for the new matrix */
1760   PetscCall(PetscCalloc1(nr, &ci));
1761   for (i = 0; i < nest->nr; ++i) {
1762     PetscInt ncr, rst;
1763 
1764     PetscCall(ISStrideGetInfo(nest->isglobal.row[i], &rst, NULL));
1765     PetscCall(ISGetLocalSize(nest->isglobal.row[i], &ncr));
1766     for (j = 0; j < nest->nc; ++j) {
1767       if (aii[i * nest->nc + j]) {
1768         PetscScalar *nvv = avv[i * nest->nc + j];
1769         PetscInt    *nii = aii[i * nest->nc + j];
1770         PetscInt    *njj = ajj[i * nest->nc + j];
1771         PetscInt     ir, cst;
1772 
1773         PetscCall(ISStrideGetInfo(nest->isglobal.col[j], &cst, NULL));
1774         for (ir = rst; ir < ncr + rst; ++ir) {
1775           PetscInt ij, rsize = nii[1] - nii[0], ist = ii[ir] + ci[ir];
1776 
1777           for (ij = 0; ij < rsize; ij++) {
1778             jj[ist + ij] = *njj + cst;
1779             vv[ist + ij] = *nvv;
1780             njj++;
1781             nvv++;
1782           }
1783           ci[ir] += rsize;
1784           nii++;
1785         }
1786       }
1787     }
1788   }
1789   PetscCall(PetscFree(ci));
1790 
1791   /* restore info */
1792   for (i = 0; i < nest->nr; ++i) {
1793     for (j = 0; j < nest->nc; ++j) {
1794       Mat B = nest->m[i][j];
1795       if (B) {
1796         PetscInt nnr = 0, k = i * nest->nc + j;
1797 
1798         B = (trans[k] ? trans[k] : B);
1799         PetscCall(MatRestoreRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &nnr, (const PetscInt **)&aii[k], (const PetscInt **)&ajj[k], &done));
1800         PetscCheck(done, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "MatRestoreRowIJ");
1801         PetscCall(MatSeqAIJRestoreArray(B, &avv[k]));
1802         PetscCall(MatDestroy(&trans[k]));
1803       }
1804     }
1805   }
1806   PetscCall(PetscFree4(aii, ajj, avv, trans));
1807 
1808   /* finalize newmat */
1809   if (reuse == MAT_INITIAL_MATRIX) {
1810     PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), nr, nc, ii, jj, vv, newmat));
1811   } else if (reuse == MAT_INPLACE_MATRIX) {
1812     Mat B;
1813 
1814     PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), nr, nc, ii, jj, vv, &B));
1815     PetscCall(MatHeaderReplace(A, &B));
1816   }
1817   PetscCall(MatAssemblyBegin(*newmat, MAT_FINAL_ASSEMBLY));
1818   PetscCall(MatAssemblyEnd(*newmat, MAT_FINAL_ASSEMBLY));
1819   {
1820     Mat_SeqAIJ *a = (Mat_SeqAIJ *)((*newmat)->data);
1821     a->free_a     = PETSC_TRUE;
1822     a->free_ij    = PETSC_TRUE;
1823   }
1824   PetscFunctionReturn(0);
1825 }
1826 
1827 PETSC_INTERN PetscErrorCode MatAXPY_Dense_Nest(Mat Y, PetscScalar a, Mat X) {
1828   Mat_Nest *nest = (Mat_Nest *)X->data;
1829   PetscInt  i, j, k, rstart;
1830   PetscBool flg;
1831 
1832   PetscFunctionBegin;
1833   /* Fill by row */
1834   for (j = 0; j < nest->nc; ++j) {
1835     /* Using global column indices and ISAllGather() is not scalable. */
1836     IS              bNis;
1837     PetscInt        bN;
1838     const PetscInt *bNindices;
1839     PetscCall(ISAllGather(nest->isglobal.col[j], &bNis));
1840     PetscCall(ISGetSize(bNis, &bN));
1841     PetscCall(ISGetIndices(bNis, &bNindices));
1842     for (i = 0; i < nest->nr; ++i) {
1843       Mat             B = nest->m[i][j], D = NULL;
1844       PetscInt        bm, br;
1845       const PetscInt *bmindices;
1846       if (!B) continue;
1847       PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSE, &flg));
1848       if (flg) {
1849         PetscTryMethod(B, "MatTransposeGetMat_C", (Mat, Mat *), (B, &D));
1850         PetscTryMethod(B, "MatHermitianTransposeGetMat_C", (Mat, Mat *), (B, &D));
1851         PetscCall(MatConvert(B, ((PetscObject)D)->type_name, MAT_INITIAL_MATRIX, &D));
1852         B = D;
1853       }
1854       PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQSBAIJ, MATMPISBAIJ, ""));
1855       if (flg) {
1856         if (D) PetscCall(MatConvert(D, MATBAIJ, MAT_INPLACE_MATRIX, &D));
1857         else PetscCall(MatConvert(B, MATBAIJ, MAT_INITIAL_MATRIX, &D));
1858         B = D;
1859       }
1860       PetscCall(ISGetLocalSize(nest->isglobal.row[i], &bm));
1861       PetscCall(ISGetIndices(nest->isglobal.row[i], &bmindices));
1862       PetscCall(MatGetOwnershipRange(B, &rstart, NULL));
1863       for (br = 0; br < bm; ++br) {
1864         PetscInt           row = bmindices[br], brncols, *cols;
1865         const PetscInt    *brcols;
1866         const PetscScalar *brcoldata;
1867         PetscScalar       *vals = NULL;
1868         PetscCall(MatGetRow(B, br + rstart, &brncols, &brcols, &brcoldata));
1869         PetscCall(PetscMalloc1(brncols, &cols));
1870         for (k = 0; k < brncols; k++) cols[k] = bNindices[brcols[k]];
1871         /*
1872           Nest blocks are required to be nonoverlapping -- otherwise nest and monolithic index layouts wouldn't match.
1873           Thus, we could use INSERT_VALUES, but I prefer ADD_VALUES.
1874          */
1875         if (a != 1.0) {
1876           PetscCall(PetscMalloc1(brncols, &vals));
1877           for (k = 0; k < brncols; k++) vals[k] = a * brcoldata[k];
1878           PetscCall(MatSetValues(Y, 1, &row, brncols, cols, vals, ADD_VALUES));
1879           PetscCall(PetscFree(vals));
1880         } else {
1881           PetscCall(MatSetValues(Y, 1, &row, brncols, cols, brcoldata, ADD_VALUES));
1882         }
1883         PetscCall(MatRestoreRow(B, br + rstart, &brncols, &brcols, &brcoldata));
1884         PetscCall(PetscFree(cols));
1885       }
1886       if (D) PetscCall(MatDestroy(&D));
1887       PetscCall(ISRestoreIndices(nest->isglobal.row[i], &bmindices));
1888     }
1889     PetscCall(ISRestoreIndices(bNis, &bNindices));
1890     PetscCall(ISDestroy(&bNis));
1891   }
1892   PetscCall(MatAssemblyBegin(Y, MAT_FINAL_ASSEMBLY));
1893   PetscCall(MatAssemblyEnd(Y, MAT_FINAL_ASSEMBLY));
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 PetscErrorCode MatConvert_Nest_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
1898   Mat_Nest   *nest = (Mat_Nest *)A->data;
1899   PetscInt    m, n, M, N, i, j, k, *dnnz, *onnz, rstart, cstart, cend;
1900   PetscMPIInt size;
1901   Mat         C;
1902 
1903   PetscFunctionBegin;
1904   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
1905   if (size == 1) { /* look for a special case with SeqAIJ matrices and strided-1, contiguous, blocks */
1906     PetscInt  nf;
1907     PetscBool fast;
1908 
1909     PetscCall(PetscStrcmp(newtype, MATAIJ, &fast));
1910     if (!fast) PetscCall(PetscStrcmp(newtype, MATSEQAIJ, &fast));
1911     for (i = 0; i < nest->nr && fast; ++i) {
1912       for (j = 0; j < nest->nc && fast; ++j) {
1913         Mat B = nest->m[i][j];
1914         if (B) {
1915           PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &fast));
1916           if (!fast) {
1917             PetscBool istrans;
1918 
1919             PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSE, &istrans));
1920             if (istrans) {
1921               Mat Bt;
1922 
1923               PetscCall(MatTransposeGetMat(B, &Bt));
1924               PetscCall(PetscObjectTypeCompare((PetscObject)Bt, MATSEQAIJ, &fast));
1925             }
1926           }
1927         }
1928       }
1929     }
1930     for (i = 0, nf = 0; i < nest->nr && fast; ++i) {
1931       PetscCall(PetscObjectTypeCompare((PetscObject)nest->isglobal.row[i], ISSTRIDE, &fast));
1932       if (fast) {
1933         PetscInt f, s;
1934 
1935         PetscCall(ISStrideGetInfo(nest->isglobal.row[i], &f, &s));
1936         if (f != nf || s != 1) {
1937           fast = PETSC_FALSE;
1938         } else {
1939           PetscCall(ISGetSize(nest->isglobal.row[i], &f));
1940           nf += f;
1941         }
1942       }
1943     }
1944     for (i = 0, nf = 0; i < nest->nc && fast; ++i) {
1945       PetscCall(PetscObjectTypeCompare((PetscObject)nest->isglobal.col[i], ISSTRIDE, &fast));
1946       if (fast) {
1947         PetscInt f, s;
1948 
1949         PetscCall(ISStrideGetInfo(nest->isglobal.col[i], &f, &s));
1950         if (f != nf || s != 1) {
1951           fast = PETSC_FALSE;
1952         } else {
1953           PetscCall(ISGetSize(nest->isglobal.col[i], &f));
1954           nf += f;
1955         }
1956       }
1957     }
1958     if (fast) {
1959       PetscCall(MatConvert_Nest_SeqAIJ_fast(A, newtype, reuse, newmat));
1960       PetscFunctionReturn(0);
1961     }
1962   }
1963   PetscCall(MatGetSize(A, &M, &N));
1964   PetscCall(MatGetLocalSize(A, &m, &n));
1965   PetscCall(MatGetOwnershipRangeColumn(A, &cstart, &cend));
1966   if (reuse == MAT_REUSE_MATRIX) C = *newmat;
1967   else {
1968     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
1969     PetscCall(MatSetType(C, newtype));
1970     PetscCall(MatSetSizes(C, m, n, M, N));
1971   }
1972   PetscCall(PetscMalloc1(2 * m, &dnnz));
1973   onnz = dnnz + m;
1974   for (k = 0; k < m; k++) {
1975     dnnz[k] = 0;
1976     onnz[k] = 0;
1977   }
1978   for (j = 0; j < nest->nc; ++j) {
1979     IS              bNis;
1980     PetscInt        bN;
1981     const PetscInt *bNindices;
1982     PetscBool       flg;
1983     /* Using global column indices and ISAllGather() is not scalable. */
1984     PetscCall(ISAllGather(nest->isglobal.col[j], &bNis));
1985     PetscCall(ISGetSize(bNis, &bN));
1986     PetscCall(ISGetIndices(bNis, &bNindices));
1987     for (i = 0; i < nest->nr; ++i) {
1988       PetscSF         bmsf;
1989       PetscSFNode    *iremote;
1990       Mat             B = nest->m[i][j], D = NULL;
1991       PetscInt        bm, *sub_dnnz, *sub_onnz, br;
1992       const PetscInt *bmindices;
1993       if (!B) continue;
1994       PetscCall(ISGetLocalSize(nest->isglobal.row[i], &bm));
1995       PetscCall(ISGetIndices(nest->isglobal.row[i], &bmindices));
1996       PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &bmsf));
1997       PetscCall(PetscMalloc1(bm, &iremote));
1998       PetscCall(PetscMalloc1(bm, &sub_dnnz));
1999       PetscCall(PetscMalloc1(bm, &sub_onnz));
2000       for (k = 0; k < bm; ++k) {
2001         sub_dnnz[k] = 0;
2002         sub_onnz[k] = 0;
2003       }
2004       PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSE, &flg));
2005       if (flg) {
2006         PetscTryMethod(B, "MatTransposeGetMat_C", (Mat, Mat *), (B, &D));
2007         PetscTryMethod(B, "MatHermitianTransposeGetMat_C", (Mat, Mat *), (B, &D));
2008         PetscCall(MatConvert(B, ((PetscObject)D)->type_name, MAT_INITIAL_MATRIX, &D));
2009         B = D;
2010       }
2011       PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQSBAIJ, MATMPISBAIJ, ""));
2012       if (flg) {
2013         if (D) PetscCall(MatConvert(D, MATBAIJ, MAT_INPLACE_MATRIX, &D));
2014         else PetscCall(MatConvert(B, MATBAIJ, MAT_INITIAL_MATRIX, &D));
2015         B = D;
2016       }
2017       /*
2018        Locate the owners for all of the locally-owned global row indices for this row block.
2019        These determine the roots of PetscSF used to communicate preallocation data to row owners.
2020        The roots correspond to the dnnz and onnz entries; thus, there are two roots per row.
2021        */
2022       PetscCall(MatGetOwnershipRange(B, &rstart, NULL));
2023       for (br = 0; br < bm; ++br) {
2024         PetscInt        row = bmindices[br], brncols, col;
2025         const PetscInt *brcols;
2026         PetscInt        rowrel   = 0; /* row's relative index on its owner rank */
2027         PetscMPIInt     rowowner = 0;
2028         PetscCall(PetscLayoutFindOwnerIndex(A->rmap, row, &rowowner, &rowrel));
2029         /* how many roots  */
2030         iremote[br].rank  = rowowner;
2031         iremote[br].index = rowrel; /* edge from bmdnnz to dnnz */
2032         /* get nonzero pattern */
2033         PetscCall(MatGetRow(B, br + rstart, &brncols, &brcols, NULL));
2034         for (k = 0; k < brncols; k++) {
2035           col = bNindices[brcols[k]];
2036           if (col >= A->cmap->range[rowowner] && col < A->cmap->range[rowowner + 1]) {
2037             sub_dnnz[br]++;
2038           } else {
2039             sub_onnz[br]++;
2040           }
2041         }
2042         PetscCall(MatRestoreRow(B, br + rstart, &brncols, &brcols, NULL));
2043       }
2044       if (D) PetscCall(MatDestroy(&D));
2045       PetscCall(ISRestoreIndices(nest->isglobal.row[i], &bmindices));
2046       /* bsf will have to take care of disposing of bedges. */
2047       PetscCall(PetscSFSetGraph(bmsf, m, bm, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER));
2048       PetscCall(PetscSFReduceBegin(bmsf, MPIU_INT, sub_dnnz, dnnz, MPI_SUM));
2049       PetscCall(PetscSFReduceEnd(bmsf, MPIU_INT, sub_dnnz, dnnz, MPI_SUM));
2050       PetscCall(PetscSFReduceBegin(bmsf, MPIU_INT, sub_onnz, onnz, MPI_SUM));
2051       PetscCall(PetscSFReduceEnd(bmsf, MPIU_INT, sub_onnz, onnz, MPI_SUM));
2052       PetscCall(PetscFree(sub_dnnz));
2053       PetscCall(PetscFree(sub_onnz));
2054       PetscCall(PetscSFDestroy(&bmsf));
2055     }
2056     PetscCall(ISRestoreIndices(bNis, &bNindices));
2057     PetscCall(ISDestroy(&bNis));
2058   }
2059   /* Resize preallocation if overestimated */
2060   for (i = 0; i < m; i++) {
2061     dnnz[i] = PetscMin(dnnz[i], A->cmap->n);
2062     onnz[i] = PetscMin(onnz[i], A->cmap->N - A->cmap->n);
2063   }
2064   PetscCall(MatSeqAIJSetPreallocation(C, 0, dnnz));
2065   PetscCall(MatMPIAIJSetPreallocation(C, 0, dnnz, 0, onnz));
2066   PetscCall(PetscFree(dnnz));
2067   PetscCall(MatAXPY_Dense_Nest(C, 1.0, A));
2068   if (reuse == MAT_INPLACE_MATRIX) {
2069     PetscCall(MatHeaderReplace(A, &C));
2070   } else *newmat = C;
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 PetscErrorCode MatConvert_Nest_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
2075   Mat      B;
2076   PetscInt m, n, M, N;
2077 
2078   PetscFunctionBegin;
2079   PetscCall(MatGetSize(A, &M, &N));
2080   PetscCall(MatGetLocalSize(A, &m, &n));
2081   if (reuse == MAT_REUSE_MATRIX) {
2082     B = *newmat;
2083     PetscCall(MatZeroEntries(B));
2084   } else {
2085     PetscCall(MatCreateDense(PetscObjectComm((PetscObject)A), m, PETSC_DECIDE, M, N, NULL, &B));
2086   }
2087   PetscCall(MatAXPY_Dense_Nest(B, 1.0, A));
2088   if (reuse == MAT_INPLACE_MATRIX) {
2089     PetscCall(MatHeaderReplace(A, &B));
2090   } else if (reuse == MAT_INITIAL_MATRIX) *newmat = B;
2091   PetscFunctionReturn(0);
2092 }
2093 
2094 PetscErrorCode MatHasOperation_Nest(Mat mat, MatOperation op, PetscBool *has) {
2095   Mat_Nest    *bA = (Mat_Nest *)mat->data;
2096   MatOperation opAdd;
2097   PetscInt     i, j, nr = bA->nr, nc = bA->nc;
2098   PetscBool    flg;
2099   PetscFunctionBegin;
2100 
2101   *has = PETSC_FALSE;
2102   if (op == MATOP_MULT || op == MATOP_MULT_ADD || op == MATOP_MULT_TRANSPOSE || op == MATOP_MULT_TRANSPOSE_ADD) {
2103     opAdd = (op == MATOP_MULT || op == MATOP_MULT_ADD ? MATOP_MULT_ADD : MATOP_MULT_TRANSPOSE_ADD);
2104     for (j = 0; j < nc; j++) {
2105       for (i = 0; i < nr; i++) {
2106         if (!bA->m[i][j]) continue;
2107         PetscCall(MatHasOperation(bA->m[i][j], opAdd, &flg));
2108         if (!flg) PetscFunctionReturn(0);
2109       }
2110     }
2111   }
2112   if (((void **)mat->ops)[op]) *has = PETSC_TRUE;
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 /*MC
2117   MATNEST - MATNEST = "nest" - Matrix type consisting of nested submatrices, each stored separately.
2118 
2119   Level: intermediate
2120 
2121   Notes:
2122   This matrix type permits scalable use of `PCFIELDSPLIT` and avoids the large memory costs of extracting submatrices.
2123   It allows the use of symmetric and block formats for parts of multi-physics simulations.
2124   It is usually used with `DMCOMPOSITE` and `DMCreateMatrix()`
2125 
2126   Each of the submatrices lives on the same MPI communicator as the original nest matrix (though they can have zero
2127   rows/columns on some processes.) Thus this is not meant for cases where the submatrices live on far fewer processes
2128   than the nest matrix.
2129 
2130 .seealso: `MATNEST`, `MatCreate()`, `MatType`, `MatCreateNest()`, `MatNestSetSubMat()`, `MatNestGetSubMat()`,
2131           `VecCreateNest()`, `DMCreateMatrix()`, `DMCOMPOSITE`, `MatNestSetVecType()`, `MatNestGetLocalISs()`,
2132           `MatNestGetISs()`, `MatNestSetSubMats()`, `MatNestGetSubMats()`
2133 M*/
2134 PETSC_EXTERN PetscErrorCode MatCreate_Nest(Mat A) {
2135   Mat_Nest *s;
2136 
2137   PetscFunctionBegin;
2138   PetscCall(PetscNewLog(A, &s));
2139   A->data = (void *)s;
2140 
2141   s->nr            = -1;
2142   s->nc            = -1;
2143   s->m             = NULL;
2144   s->splitassembly = PETSC_FALSE;
2145 
2146   PetscCall(PetscMemzero(A->ops, sizeof(*A->ops)));
2147 
2148   A->ops->mult                  = MatMult_Nest;
2149   A->ops->multadd               = MatMultAdd_Nest;
2150   A->ops->multtranspose         = MatMultTranspose_Nest;
2151   A->ops->multtransposeadd      = MatMultTransposeAdd_Nest;
2152   A->ops->transpose             = MatTranspose_Nest;
2153   A->ops->assemblybegin         = MatAssemblyBegin_Nest;
2154   A->ops->assemblyend           = MatAssemblyEnd_Nest;
2155   A->ops->zeroentries           = MatZeroEntries_Nest;
2156   A->ops->copy                  = MatCopy_Nest;
2157   A->ops->axpy                  = MatAXPY_Nest;
2158   A->ops->duplicate             = MatDuplicate_Nest;
2159   A->ops->createsubmatrix       = MatCreateSubMatrix_Nest;
2160   A->ops->destroy               = MatDestroy_Nest;
2161   A->ops->view                  = MatView_Nest;
2162   A->ops->getvecs               = NULL; /* Use VECNEST by calling MatNestSetVecType(A,VECNEST) */
2163   A->ops->getlocalsubmatrix     = MatGetLocalSubMatrix_Nest;
2164   A->ops->restorelocalsubmatrix = MatRestoreLocalSubMatrix_Nest;
2165   A->ops->getdiagonal           = MatGetDiagonal_Nest;
2166   A->ops->diagonalscale         = MatDiagonalScale_Nest;
2167   A->ops->scale                 = MatScale_Nest;
2168   A->ops->shift                 = MatShift_Nest;
2169   A->ops->diagonalset           = MatDiagonalSet_Nest;
2170   A->ops->setrandom             = MatSetRandom_Nest;
2171   A->ops->hasoperation          = MatHasOperation_Nest;
2172   A->ops->missingdiagonal       = MatMissingDiagonal_Nest;
2173 
2174   A->spptr     = NULL;
2175   A->assembled = PETSC_FALSE;
2176 
2177   /* expose Nest api's */
2178   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetSubMat_C", MatNestGetSubMat_Nest));
2179   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestSetSubMat_C", MatNestSetSubMat_Nest));
2180   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetSubMats_C", MatNestGetSubMats_Nest));
2181   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetSize_C", MatNestGetSize_Nest));
2182   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetISs_C", MatNestGetISs_Nest));
2183   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestGetLocalISs_C", MatNestGetLocalISs_Nest));
2184   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestSetVecType_C", MatNestSetVecType_Nest));
2185   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatNestSetSubMats_C", MatNestSetSubMats_Nest));
2186   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_mpiaij_C", MatConvert_Nest_AIJ));
2187   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_seqaij_C", MatConvert_Nest_AIJ));
2188   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_aij_C", MatConvert_Nest_AIJ));
2189   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_is_C", MatConvert_Nest_IS));
2190   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_mpidense_C", MatConvert_Nest_Dense));
2191   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_nest_seqdense_C", MatConvert_Nest_Dense));
2192   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_nest_seqdense_C", MatProductSetFromOptions_Nest_Dense));
2193   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_nest_mpidense_C", MatProductSetFromOptions_Nest_Dense));
2194   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_nest_dense_C", MatProductSetFromOptions_Nest_Dense));
2195 
2196   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATNEST));
2197   PetscFunctionReturn(0);
2198 }
2199