xref: /honee/src/mat-ceed.c (revision 7ebcd9b01a045eb624b7652e7cfa153f1be03d84)
1 // SPDX-FileCopyrightText: Copyright (c) 2017-2024, HONEE contributors.
2 // SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause
3 /// @file
4 /// MatCEED implementation
5 
6 #include <ceed.h>
7 #include <ceed/backend.h>
8 #include <mat-ceed-impl.h>
9 #include <mat-ceed.h>
10 #include <petsc-ceed-utils.h>
11 #include <petsc-ceed.h>
12 #include <petscdm.h>
13 #include <petscmat.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 
19 PetscClassId  MATCEED_CLASSID;
20 PetscLogEvent MATCEED_MULT, MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE, MATCEED_MULT_TRANSPOSE_CEEDOP, MATCEED_ASSEMBLE_DIAGONAL,
21     MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, MATCEED_SETUP_PBDIAGONAL, MATCEED_SETUP_PBDIAGONAL_CEEDOP, MATCEED_ASSEMBLE_PBDIAGONAL,
22     MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, MATCEED_SETUP_FULL, MATCEED_SETUP_FULL_CEEDOP, MATCEED_ASSEMBLE_FULL, MATCEED_ASSEMBLE_FULL_CEEDOP;
23 
24 /**
25   @brief Register MATCEED log events.
26 
27   Not collective across MPI processes.
28 
29   @return An error code: 0 - success, otherwise - failure
30 **/
31 static PetscErrorCode MatCeedRegisterLogEvents() {
32   static PetscBool registered = PETSC_FALSE;
33 
34   PetscFunctionBeginUser;
35   if (registered) PetscFunctionReturn(PETSC_SUCCESS);
36   PetscCall(PetscClassIdRegister("MatCEED", &MATCEED_CLASSID));
37   PetscCall(PetscLogEventRegister("MatCEEDMul", MATCEED_CLASSID, &MATCEED_MULT));
38   PetscCall(PetscLogEventRegister("MatCEEDMulCeed", MATCEED_CLASSID, &MATCEED_MULT_CEEDOP));
39   PetscCall(PetscLogEventRegister("MatCEEDMulT", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE));
40   PetscCall(PetscLogEventRegister("MatCEEDMulTCeed", MATCEED_CLASSID, &MATCEED_MULT_TRANSPOSE_CEEDOP));
41   PetscCall(PetscLogEventRegister("MatCEEDAsmDiag", MATCEED_CLASSID, &MATCEED_ASSEMBLE_DIAGONAL));
42   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSU", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL));
43   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_PBDIAGONAL_CEEDOP));
44   PetscCall(PetscLogEventRegister("MatCEEDAsmPBD", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL));
45   PetscCall(PetscLogEventRegister("MatCEEDAsmPBDCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP));
46   PetscCall(PetscLogEventRegister("MatCEEDAsmSU", MATCEED_CLASSID, &MATCEED_SETUP_FULL));
47   PetscCall(PetscLogEventRegister("MatCEEDAsmSUCeed", MATCEED_CLASSID, &MATCEED_SETUP_FULL_CEEDOP));
48   PetscCall(PetscLogEventRegister("MatCEEDAsm", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL));
49   PetscCall(PetscLogEventRegister("MatCEEDAsmCeed", MATCEED_CLASSID, &MATCEED_ASSEMBLE_FULL_CEEDOP));
50   registered = PETSC_TRUE;
51   PetscFunctionReturn(PETSC_SUCCESS);
52 }
53 
54 /**
55   @brief Assemble the point block diagonal of a `MATCEED` into a `MATAIJ` or similar.
56          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
57          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
58 
59   Collective across MPI processes.
60 
61   @param[in]      mat_ceed  `MATCEED` to assemble
62   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
63 
64   @return An error code: 0 - success, otherwise - failure
65 **/
66 static PetscErrorCode MatCeedAssemblePointBlockDiagonalCOO(Mat mat_ceed, Mat mat_coo) {
67   MatCeedContext ctx;
68 
69   PetscFunctionBeginUser;
70   PetscCall(MatShellGetContext(mat_ceed, &ctx));
71 
72   // Check if COO pattern set
73   {
74     PetscInt index = -1;
75 
76     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
77       if (ctx->mats_assembled_pbd[i] == mat_coo) index = i;
78     }
79     if (index == -1) {
80       PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
81       CeedInt      *rows_ceed, *cols_ceed;
82       PetscCount    num_entries;
83       PetscLogStage stage_amg_setup;
84 
85       // -- Assemble sparsity pattern if mat hasn't been assembled before
86       PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
87       if (stage_amg_setup == -1) {
88         PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
89       }
90       PetscCall(PetscLogStagePush(stage_amg_setup));
91       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
92       PetscCall(PetscLogEventBegin(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
93       PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonalSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
94       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
95       PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
96       PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
97       PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
98       free(rows_petsc);
99       free(cols_petsc);
100       if (!ctx->coo_values_pbd) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_pbd));
101       PetscCall(PetscRealloc(++ctx->num_mats_assembled_pbd * sizeof(Mat), &ctx->mats_assembled_pbd));
102       ctx->mats_assembled_pbd[ctx->num_mats_assembled_pbd - 1] = mat_coo;
103       PetscCall(PetscLogEventEnd(MATCEED_SETUP_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
104       PetscCall(PetscLogStagePop());
105     }
106   }
107 
108   // Assemble mat_ceed
109   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
110   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
111   {
112     const CeedScalar *values;
113     MatType           mat_type;
114     CeedMemType       mem_type = CEED_MEM_HOST;
115     PetscBool         is_spd, is_spd_known;
116 
117     PetscCall(MatGetType(mat_coo, &mat_type));
118     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
119     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
120     else mem_type = CEED_MEM_HOST;
121 
122     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
123     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemblePointBlockDiagonal(ctx->op_mult, ctx->coo_values_pbd, CEED_REQUEST_IMMEDIATE));
124     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
125     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_pbd, mem_type, &values));
126     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
127     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
128     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
129     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_pbd, &values));
130   }
131   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
132   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_PBDIAGONAL, mat_ceed, mat_coo, NULL, NULL));
133   PetscFunctionReturn(PETSC_SUCCESS);
134 }
135 
136 /**
137   @brief Assemble inner `Mat` for diagonal `PC` operations
138 
139   Collective across MPI processes.
140 
141   @param[in]   mat_ceed      `MATCEED` to invert
142   @param[in]   use_ceed_pbd  Boolean flag to use libCEED PBD assembly
143   @param[out]  mat_inner     Inner `Mat` for diagonal operations
144 
145   @return An error code: 0 - success, otherwise - failure
146 **/
147 static PetscErrorCode MatCeedAssembleInnerBlockDiagonalMat(Mat mat_ceed, PetscBool use_ceed_pbd, Mat *mat_inner) {
148   MatCeedContext ctx;
149 
150   PetscFunctionBeginUser;
151   PetscCall(MatShellGetContext(mat_ceed, &ctx));
152   if (use_ceed_pbd) {
153     // Check if COO pattern set
154     if (!ctx->mat_assembled_pbd_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_pbd_internal));
155 
156     // Assemble mat_assembled_full_internal
157     PetscCall(MatCeedAssemblePointBlockDiagonalCOO(mat_ceed, ctx->mat_assembled_pbd_internal));
158     if (mat_inner) *mat_inner = ctx->mat_assembled_pbd_internal;
159   } else {
160     // Check if COO pattern set
161     if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
162 
163     // Assemble mat_assembled_full_internal
164     PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
165     if (mat_inner) *mat_inner = ctx->mat_assembled_full_internal;
166   }
167   PetscFunctionReturn(PETSC_SUCCESS);
168 }
169 
170 /**
171   @brief Get on-process diagonal block of `MATCEED`
172 
173   This is a block per-process of the diagonal of the global matrix.
174   This is NOT the diagonal blocks associated with the block size of the matrix (i.e. `MatSetBlockSize()` has no effect on this function).
175 
176   Collective across MPI processes.
177 
178   @param[in]   mat_ceed   `MATCEED` to invert
179   @param[out]  mat_block  The diagonal block matrix
180 
181   @return An error code: 0 - success, otherwise - failure
182 **/
183 static PetscErrorCode MatGetDiagonalBlock_Ceed(Mat mat_ceed, Mat *mat_block) {
184   MatCeedContext ctx;
185 
186   PetscFunctionBeginUser;
187   PetscCall(MatShellGetContext(mat_ceed, &ctx));
188 
189   // Check if COO pattern set
190   if (!ctx->mat_assembled_full_internal) PetscCall(MatCeedCreateMatCOO(mat_ceed, &ctx->mat_assembled_full_internal));
191 
192   // Assemble mat_assembled_full_internal
193   PetscCall(MatCeedAssembleCOO(mat_ceed, ctx->mat_assembled_full_internal));
194 
195   // Get diagonal block
196   PetscCall(MatGetDiagonalBlock(ctx->mat_assembled_full_internal, mat_block));
197   PetscFunctionReturn(PETSC_SUCCESS);
198 }
199 
200 /**
201   @brief Invert `MATCEED` diagonal block for Jacobi.
202 
203   Collective across MPI processes.
204 
205   @param[in]   mat_ceed  `MATCEED` to invert
206   @param[out]  values    The block inverses in column major order
207 
208   @return An error code: 0 - success, otherwise - failure
209 **/
210 static PetscErrorCode MatInvertBlockDiagonal_Ceed(Mat mat_ceed, const PetscScalar **values) {
211   Mat            mat_inner = NULL;
212   MatCeedContext ctx;
213 
214   PetscFunctionBeginUser;
215   PetscCall(MatShellGetContext(mat_ceed, &ctx));
216 
217   // Assemble inner mat if needed
218   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_pbd_valid, &mat_inner));
219 
220   // Invert PB diagonal
221   PetscCall(MatInvertBlockDiagonal(mat_inner, values));
222   PetscFunctionReturn(PETSC_SUCCESS);
223 }
224 
225 /**
226   @brief Invert `MATCEED` variable diagonal block for Jacobi.
227 
228   Collective across MPI processes.
229 
230   @param[in]   mat_ceed     `MATCEED` to invert
231   @param[in]   num_blocks   The number of blocks on the process
232   @param[in]   block_sizes  The size of each block on the process
233   @param[out]  values       The block inverses in column major order
234 
235   @return An error code: 0 - success, otherwise - failure
236 **/
237 static PetscErrorCode MatInvertVariableBlockDiagonal_Ceed(Mat mat_ceed, PetscInt num_blocks, const PetscInt *block_sizes, PetscScalar *values) {
238   Mat            mat_inner = NULL;
239   MatCeedContext ctx;
240 
241   PetscFunctionBeginUser;
242   PetscCall(MatShellGetContext(mat_ceed, &ctx));
243 
244   // Assemble inner mat if needed
245   PetscCall(MatCeedAssembleInnerBlockDiagonalMat(mat_ceed, ctx->is_ceed_vpbd_valid, &mat_inner));
246 
247   // Invert PB diagonal
248   PetscCall(MatInvertVariableBlockDiagonal(mat_inner, num_blocks, block_sizes, values));
249   PetscFunctionReturn(PETSC_SUCCESS);
250 }
251 
252 /**
253   @brief View `MATCEED`.
254 
255   Collective across MPI processes.
256 
257   @param[in]   mat_ceed  `MATCEED` to view
258   @param[in]   viewer    The visualization context
259 
260   @return An error code: 0 - success, otherwise - failure
261 **/
262 static PetscErrorCode MatView_Ceed(Mat mat_ceed, PetscViewer viewer) {
263   PetscBool         is_ascii;
264   PetscViewerFormat format;
265   PetscMPIInt       size, rank;
266   MatCeedContext    ctx;
267 
268   PetscFunctionBeginUser;
269   PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2);
270   PetscCall(MatShellGetContext(mat_ceed, &ctx));
271   if (!viewer) PetscCall(PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)mat_ceed), &viewer));
272 
273   PetscCall(PetscViewerGetFormat(viewer, &format));
274   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat_ceed), &size));
275   if (size == 1 && format == PETSC_VIEWER_LOAD_BALANCE) PetscFunctionReturn(PETSC_SUCCESS);
276 
277   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat_ceed), &rank));
278   if (rank != 0) PetscFunctionReturn(PETSC_SUCCESS);
279 
280   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &is_ascii));
281   {
282     PetscBool is_detailed     = format == PETSC_VIEWER_ASCII_INFO_DETAIL;
283     char      rank_string[16] = {'\0'};
284     FILE     *file;
285 
286     PetscCall(PetscViewerASCIIPrintf(viewer, "MatCEED:\n"));
287     PetscCall(PetscViewerASCIIPushTab(viewer));  // MatCEED
288     PetscCall(PetscViewerASCIIPrintf(viewer, "Default COO MatType: %s\n", ctx->coo_mat_type));
289     PetscCall(PetscSNPrintf(rank_string, 16, "on Rank %d", rank));
290     PetscCall(PetscViewerASCIIPrintf(viewer, "CeedOperator Apply %s:\n", is_detailed ? rank_string : "Summary"));
291     PetscCall(PetscViewerASCIIPrintf(viewer, "PB Diagonal Assembly Valid: %s\n", ctx->is_ceed_pbd_valid ? "True" : "False"));
292     PetscCall(PetscViewerASCIIPrintf(viewer, "VPB Diagonal Assembly Valid: %s\n", ctx->is_ceed_vpbd_valid ? "True" : "False"));
293     PetscCall(PetscViewerASCIIGetPointer(viewer, &file));
294     PetscCall(PetscViewerASCIIPushTab(viewer));  // CeedOperator
295     if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult, file));
296     else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult, file));
297     PetscCall(PetscViewerASCIIPopTab(viewer));  // CeedOperator
298     if (ctx->op_mult_transpose) {
299       PetscCall(PetscViewerASCIIPrintf(viewer, "CeedOperator ApplyTranspose %s:\n", is_detailed ? rank_string : "Summary"));
300       PetscCall(PetscViewerASCIIPushTab(viewer));  // CeedOperator
301       if (is_detailed) PetscCallCeed(ctx->ceed, CeedOperatorView(ctx->op_mult_transpose, file));
302       else PetscCallCeed(ctx->ceed, CeedOperatorViewTerse(ctx->op_mult_transpose, file));
303       PetscCall(PetscViewerASCIIPopTab(viewer));  // CeedOperator
304     }
305     PetscCall(PetscViewerASCIIPopTab(viewer));  // MatCEED
306   }
307   PetscFunctionReturn(PETSC_SUCCESS);
308 }
309 
310 // -----------------------------------------------------------------------------
311 // MatCeed
312 // -----------------------------------------------------------------------------
313 
314 /**
315   @brief Create PETSc `Mat` from libCEED operators.
316 
317   Collective across MPI processes.
318 
319   @param[in]   dm_x                      Input `DM`
320   @param[in]   dm_y                      Output `DM`
321   @param[in]   op_mult                   `CeedOperator` for forward evaluation
322   @param[in]   op_mult_transpose         `CeedOperator` for transpose evaluation
323   @param[out]  mat                        New MatCeed
324 
325   @return An error code: 0 - success, otherwise - failure
326 **/
327 PetscErrorCode MatCreateCeed(DM dm_x, DM dm_y, CeedOperator op_mult, CeedOperator op_mult_transpose, Mat *mat) {
328   PetscInt       X_l_size, X_g_size, Y_l_size, Y_g_size;
329   VecType        vec_type;
330   MatCeedContext ctx;
331 
332   PetscFunctionBeginUser;
333   PetscCall(MatCeedRegisterLogEvents());
334 
335   // Collect context data
336   PetscCall(DMGetVecType(dm_x, &vec_type));
337   {
338     Vec X;
339 
340     PetscCall(DMGetGlobalVector(dm_x, &X));
341     PetscCall(VecGetSize(X, &X_g_size));
342     PetscCall(VecGetLocalSize(X, &X_l_size));
343     PetscCall(DMRestoreGlobalVector(dm_x, &X));
344   }
345   if (dm_y) {
346     Vec Y;
347 
348     PetscCall(DMGetGlobalVector(dm_y, &Y));
349     PetscCall(VecGetSize(Y, &Y_g_size));
350     PetscCall(VecGetLocalSize(Y, &Y_l_size));
351     PetscCall(DMRestoreGlobalVector(dm_y, &Y));
352   } else {
353     dm_y     = dm_x;
354     Y_g_size = X_g_size;
355     Y_l_size = X_l_size;
356   }
357 
358   // Create context
359   {
360     Vec X_loc, Y_loc_transpose = NULL;
361 
362     PetscCall(DMCreateLocalVector(dm_x, &X_loc));
363     PetscCall(VecZeroEntries(X_loc));
364     if (op_mult_transpose) {
365       PetscCall(DMCreateLocalVector(dm_y, &Y_loc_transpose));
366       PetscCall(VecZeroEntries(Y_loc_transpose));
367     }
368     PetscCall(MatCeedContextCreate(dm_x, dm_y, X_loc, Y_loc_transpose, op_mult, op_mult_transpose, MATCEED_MULT, MATCEED_MULT_TRANSPOSE,
369                                    MATCEED_MULT_CEEDOP, MATCEED_MULT_TRANSPOSE_CEEDOP, &ctx));
370     PetscCall(VecDestroy(&X_loc));
371     PetscCall(VecDestroy(&Y_loc_transpose));
372   }
373 
374   // Create mat
375   PetscCall(MatCreateShell(PetscObjectComm((PetscObject)dm_x), Y_l_size, X_l_size, Y_g_size, X_g_size, ctx, mat));
376   PetscCall(PetscObjectChangeTypeName((PetscObject)*mat, MATCEED));
377   // -- Set block and variable block sizes
378   if (dm_x == dm_y) {
379     MatType dm_mat_type, dm_mat_type_copy;
380     Mat     temp_mat;
381 
382     PetscCall(DMGetMatType(dm_x, &dm_mat_type));
383     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
384     PetscCall(DMSetMatType(dm_x, MATAIJ));
385     PetscCall(DMCreateMatrix(dm_x, &temp_mat));
386     PetscCall(DMSetMatType(dm_x, dm_mat_type_copy));
387     PetscCall(PetscFree(dm_mat_type_copy));
388 
389     {
390       PetscInt        block_size, num_blocks, max_vblock_size = PETSC_INT_MAX;
391       const PetscInt *vblock_sizes;
392 
393       // -- Get block sizes
394       PetscCall(MatGetBlockSize(temp_mat, &block_size));
395       PetscCall(MatGetVariableBlockSizes(temp_mat, &num_blocks, &vblock_sizes));
396       {
397         PetscInt local_min_max[2] = {0}, global_min_max[2] = {0, PETSC_INT_MAX};
398 
399         for (PetscInt i = 0; i < num_blocks; i++) local_min_max[1] = PetscMax(local_min_max[1], vblock_sizes[i]);
400         PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_min_max, global_min_max));
401         max_vblock_size = global_min_max[1];
402       }
403 
404       // -- Copy block sizes
405       if (block_size > 1) PetscCall(MatSetBlockSize(*mat, block_size));
406       if (num_blocks) PetscCall(MatSetVariableBlockSizes(*mat, num_blocks, (PetscInt *)vblock_sizes));
407 
408       // -- Check libCEED compatibility
409       {
410         bool is_composite;
411 
412         ctx->is_ceed_pbd_valid  = PETSC_TRUE;
413         ctx->is_ceed_vpbd_valid = PETSC_TRUE;
414         PetscCallCeed(ctx->ceed, CeedOperatorIsComposite(op_mult, &is_composite));
415         if (is_composite) {
416           CeedInt       num_sub_operators;
417           CeedOperator *sub_operators;
418 
419           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetNumSub(op_mult, &num_sub_operators));
420           PetscCallCeed(ctx->ceed, CeedCompositeOperatorGetSubList(op_mult, &sub_operators));
421           for (CeedInt i = 0; i < num_sub_operators; i++) {
422             CeedInt                  num_bases, num_comp;
423             CeedBasis               *active_bases;
424             CeedOperatorAssemblyData assembly_data;
425 
426             PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(sub_operators[i], &assembly_data));
427             PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
428             PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
429             if (num_bases > 1) {
430               ctx->is_ceed_pbd_valid  = PETSC_FALSE;
431               ctx->is_ceed_vpbd_valid = PETSC_FALSE;
432             }
433             if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
434             if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
435           }
436         } else {
437           // LCOV_EXCL_START
438           CeedInt                  num_bases, num_comp;
439           CeedBasis               *active_bases;
440           CeedOperatorAssemblyData assembly_data;
441 
442           PetscCallCeed(ctx->ceed, CeedOperatorGetOperatorAssemblyData(op_mult, &assembly_data));
443           PetscCallCeed(ctx->ceed, CeedOperatorAssemblyDataGetBases(assembly_data, &num_bases, &active_bases, NULL, NULL, NULL, NULL));
444           PetscCallCeed(ctx->ceed, CeedBasisGetNumComponents(active_bases[0], &num_comp));
445           if (num_bases > 1) {
446             ctx->is_ceed_pbd_valid  = PETSC_FALSE;
447             ctx->is_ceed_vpbd_valid = PETSC_FALSE;
448           }
449           if (num_comp != block_size) ctx->is_ceed_pbd_valid = PETSC_FALSE;
450           if (num_comp < max_vblock_size) ctx->is_ceed_vpbd_valid = PETSC_FALSE;
451           // LCOV_EXCL_STOP
452         }
453         {
454           PetscInt local_is_valid[2], global_is_valid[2];
455 
456           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_pbd_valid;
457           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
458           ctx->is_ceed_pbd_valid = global_is_valid[0];
459           local_is_valid[0] = local_is_valid[1] = ctx->is_ceed_vpbd_valid;
460           PetscCall(PetscGlobalMinMaxInt(PetscObjectComm((PetscObject)dm_x), local_is_valid, global_is_valid));
461           ctx->is_ceed_vpbd_valid = global_is_valid[0];
462         }
463       }
464     }
465     PetscCall(MatDestroy(&temp_mat));
466   }
467   // -- Set internal mat type
468   {
469     VecType vec_type;
470     MatType coo_mat_type;
471 
472     PetscCall(VecGetType(ctx->X_loc, &vec_type));
473     if (strstr(vec_type, VECCUDA)) coo_mat_type = MATAIJCUSPARSE;
474     else if (strstr(vec_type, VECKOKKOS)) coo_mat_type = MATAIJKOKKOS;
475     else coo_mat_type = MATAIJ;
476     PetscCall(PetscStrallocpy(coo_mat_type, &ctx->coo_mat_type));
477   }
478   // -- Set mat operations
479   PetscCall(MatShellSetContextDestroy(*mat, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
480   PetscCall(MatShellSetOperation(*mat, MATOP_VIEW, (void (*)(void))MatView_Ceed));
481   PetscCall(MatShellSetOperation(*mat, MATOP_MULT, (void (*)(void))MatMult_Ceed));
482   if (op_mult_transpose) PetscCall(MatShellSetOperation(*mat, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
483   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
484   PetscCall(MatShellSetOperation(*mat, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
485   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
486   PetscCall(MatShellSetOperation(*mat, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
487   PetscCall(MatShellSetVecType(*mat, vec_type));
488   PetscFunctionReturn(PETSC_SUCCESS);
489 }
490 
491 /**
492   @brief Copy `MATCEED` into a compatible `Mat` with type `MatShell` or `MATCEED`.
493 
494   Collective across MPI processes.
495 
496   @param[in]   mat_ceed   `MATCEED` to copy from
497   @param[out]  mat_other  `MatShell` or `MATCEED` to copy into
498 
499   @return An error code: 0 - success, otherwise - failure
500 **/
501 PetscErrorCode MatCeedCopy(Mat mat_ceed, Mat mat_other) {
502   PetscFunctionBeginUser;
503   PetscCall(MatCeedRegisterLogEvents());
504 
505   // Check type compatibility
506   {
507     PetscBool is_matceed = PETSC_FALSE, is_matshell = PETSC_FALSE;
508     MatType   mat_type_ceed, mat_type_other;
509 
510     PetscCall(MatGetType(mat_ceed, &mat_type_ceed));
511     PetscCall(PetscStrcmp(mat_type_ceed, MATCEED, &is_matceed));
512     PetscCheck(is_matceed, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_ceed must have type " MATCEED);
513     PetscCall(MatGetType(mat_other, &mat_type_other));
514     PetscCall(PetscStrcmp(mat_type_other, MATCEED, &is_matceed));
515     PetscCall(PetscStrcmp(mat_type_other, MATSHELL, &is_matceed));
516     PetscCheck(is_matceed || is_matshell, PETSC_COMM_SELF, PETSC_ERR_LIB, "mat_other must have type " MATCEED " or " MATSHELL);
517   }
518 
519   // Check dimension compatibility
520   {
521     PetscInt X_l_ceed_size, X_g_ceed_size, Y_l_ceed_size, Y_g_ceed_size, X_l_other_size, X_g_other_size, Y_l_other_size, Y_g_other_size;
522 
523     PetscCall(MatGetSize(mat_ceed, &Y_g_ceed_size, &X_g_ceed_size));
524     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_ceed_size, &X_l_ceed_size));
525     PetscCall(MatGetSize(mat_ceed, &Y_g_other_size, &X_g_other_size));
526     PetscCall(MatGetLocalSize(mat_ceed, &Y_l_other_size, &X_l_other_size));
527     PetscCheck((Y_g_ceed_size == Y_g_other_size) && (X_g_ceed_size == X_g_other_size) && (Y_l_ceed_size == Y_l_other_size) &&
528                    (X_l_ceed_size == X_l_other_size),
529                PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ,
530                "mat_ceed and mat_other must have compatible sizes; found mat_ceed (Global: %" PetscInt_FMT ", %" PetscInt_FMT
531                "; Local: %" PetscInt_FMT ", %" PetscInt_FMT ") mat_other (Global: %" PetscInt_FMT ", %" PetscInt_FMT "; Local: %" PetscInt_FMT
532                ", %" PetscInt_FMT ")",
533                Y_g_ceed_size, X_g_ceed_size, Y_l_ceed_size, X_l_ceed_size, Y_g_other_size, X_g_other_size, Y_l_other_size, X_l_other_size);
534   }
535 
536   // Convert
537   {
538     VecType        vec_type;
539     MatCeedContext ctx;
540 
541     PetscCall(PetscObjectChangeTypeName((PetscObject)mat_other, MATCEED));
542     PetscCall(MatShellGetContext(mat_ceed, &ctx));
543     PetscCall(MatCeedContextReference(ctx));
544     PetscCall(MatShellSetContext(mat_other, ctx));
545     PetscCall(MatShellSetContextDestroy(mat_other, (PetscErrorCode(*)(void *))MatCeedContextDestroy));
546     PetscCall(MatShellSetOperation(mat_other, MATOP_VIEW, (void (*)(void))MatView_Ceed));
547     PetscCall(MatShellSetOperation(mat_other, MATOP_MULT, (void (*)(void))MatMult_Ceed));
548     if (ctx->op_mult_transpose) PetscCall(MatShellSetOperation(mat_other, MATOP_MULT_TRANSPOSE, (void (*)(void))MatMultTranspose_Ceed));
549     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL, (void (*)(void))MatGetDiagonal_Ceed));
550     PetscCall(MatShellSetOperation(mat_other, MATOP_GET_DIAGONAL_BLOCK, (void (*)(void))MatGetDiagonalBlock_Ceed));
551     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_BLOCK_DIAGONAL, (void (*)(void))MatInvertBlockDiagonal_Ceed));
552     PetscCall(MatShellSetOperation(mat_other, MATOP_INVERT_VBLOCK_DIAGONAL, (void (*)(void))MatInvertVariableBlockDiagonal_Ceed));
553     {
554       PetscInt block_size;
555 
556       PetscCall(MatGetBlockSize(mat_ceed, &block_size));
557       if (block_size > 1) PetscCall(MatSetBlockSize(mat_other, block_size));
558     }
559     {
560       PetscInt        num_blocks;
561       const PetscInt *block_sizes;
562 
563       PetscCall(MatGetVariableBlockSizes(mat_ceed, &num_blocks, &block_sizes));
564       if (num_blocks) PetscCall(MatSetVariableBlockSizes(mat_other, num_blocks, (PetscInt *)block_sizes));
565     }
566     PetscCall(DMGetVecType(ctx->dm_x, &vec_type));
567     PetscCall(MatShellSetVecType(mat_other, vec_type));
568   }
569   PetscFunctionReturn(PETSC_SUCCESS);
570 }
571 
572 /**
573   @brief Mark `CeedQFunction` data as updated and the `CeedQFunction` as requiring re-assembly for a `MatCEED`.
574 
575   Collective across MPI processes.
576 
577   @param[in]   mat_ceed       `MATCEED`
578   @param[out]  update_needed  Boolean flag indicating `CeedQFunction` update needed
579 
580   @return An error code: 0 - success, otherwise - failure
581 **/
582 PetscErrorCode MatCeedSetAssemblyDataUpdateNeeded(Mat mat_ceed, PetscBool update_needed) {
583   MatCeedContext ctx;
584 
585   PetscFunctionBeginUser;
586   PetscCall(MatShellGetContext(mat_ceed, &ctx));
587   PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult, update_needed));
588   if (ctx->op_mult_transpose) {
589     PetscCallCeed(ctx->ceed, CeedOperatorSetQFunctionAssemblyDataUpdateNeeded(ctx->op_mult_transpose, update_needed));
590   }
591   if (update_needed) {
592     PetscCall(MatAssemblyBegin(mat_ceed, MAT_FINAL_ASSEMBLY));
593     PetscCall(MatAssemblyEnd(mat_ceed, MAT_FINAL_ASSEMBLY));
594   }
595   PetscFunctionReturn(PETSC_SUCCESS);
596 }
597 
598 /**
599   @brief Setup a `Mat` with the same COO pattern as a `MatCEED`.
600 
601   Collective across MPI processes.
602 
603   @param[in]   mat_ceed  `MATCEED`
604   @param[out]  mat_coo   Sparse `Mat` with same COO pattern
605 
606   @return An error code: 0 - success, otherwise - failure
607 **/
608 PetscErrorCode MatCeedCreateMatCOO(Mat mat_ceed, Mat *mat_coo) {
609   MatCeedContext ctx;
610 
611   PetscFunctionBeginUser;
612   PetscCall(MatShellGetContext(mat_ceed, &ctx));
613 
614   PetscCheck(ctx->dm_x == ctx->dm_y, PetscObjectComm((PetscObject)mat_ceed), PETSC_ERR_SUP, "COO assembly only supported for MATCEED on a single DM");
615 
616   // Check cl mat type
617   {
618     PetscBool is_coo_mat_type_cl = PETSC_FALSE;
619     char      coo_mat_type_cl[64];
620 
621     // Check for specific CL coo mat type for this Mat
622     {
623       const char *mat_ceed_prefix = NULL;
624 
625       PetscCall(MatGetOptionsPrefix(mat_ceed, &mat_ceed_prefix));
626       PetscOptionsBegin(PetscObjectComm((PetscObject)mat_ceed), mat_ceed_prefix, "", NULL);
627       PetscCall(PetscOptionsFList("-ceed_coo_mat_type", "Default MATCEED COO assembly MatType", NULL, MatList, coo_mat_type_cl, coo_mat_type_cl,
628                                   sizeof(coo_mat_type_cl), &is_coo_mat_type_cl));
629       PetscOptionsEnd();
630       if (is_coo_mat_type_cl) {
631         PetscCall(PetscFree(ctx->coo_mat_type));
632         PetscCall(PetscStrallocpy(coo_mat_type_cl, &ctx->coo_mat_type));
633       }
634     }
635   }
636 
637   // Create sparse matrix
638   {
639     MatType dm_mat_type, dm_mat_type_copy;
640 
641     PetscCall(DMGetMatType(ctx->dm_x, &dm_mat_type));
642     PetscCall(PetscStrallocpy(dm_mat_type, (char **)&dm_mat_type_copy));
643     PetscCall(DMSetMatType(ctx->dm_x, ctx->coo_mat_type));
644     PetscCall(DMCreateMatrix(ctx->dm_x, mat_coo));
645     PetscCall(DMSetMatType(ctx->dm_x, dm_mat_type_copy));
646     PetscCall(PetscFree(dm_mat_type_copy));
647   }
648   PetscFunctionReturn(PETSC_SUCCESS);
649 }
650 
651 /**
652   @brief Setup the COO preallocation `MATCEED` into a `MATAIJ` or similar.
653          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
654 
655   Collective across MPI processes.
656 
657   @param[in]      mat_ceed  `MATCEED` to assemble
658   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
659 
660   @return An error code: 0 - success, otherwise - failure
661 **/
662 PetscErrorCode MatCeedSetPreallocationCOO(Mat mat_ceed, Mat mat_coo) {
663   MatCeedContext ctx;
664 
665   PetscFunctionBeginUser;
666   PetscCall(MatShellGetContext(mat_ceed, &ctx));
667 
668   {
669     PetscInt     *rows_petsc = NULL, *cols_petsc = NULL;
670     CeedInt      *rows_ceed, *cols_ceed;
671     PetscCount    num_entries;
672     PetscLogStage stage_amg_setup;
673 
674     // -- Assemble sparsity pattern if mat hasn't been assembled before
675     PetscCall(PetscLogStageGetId("MatCEED Asm Setup", &stage_amg_setup));
676     if (stage_amg_setup == -1) {
677       PetscCall(PetscLogStageRegister("MatCEED Asm Setup", &stage_amg_setup));
678     }
679     PetscCall(PetscLogStagePush(stage_amg_setup));
680     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
681     PetscCall(PetscLogEventBegin(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
682     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleSymbolic(ctx->op_mult, &num_entries, &rows_ceed, &cols_ceed));
683     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
684     PetscCall(IntArrayCeedToPetsc(num_entries, &rows_ceed, &rows_petsc));
685     PetscCall(IntArrayCeedToPetsc(num_entries, &cols_ceed, &cols_petsc));
686     PetscCall(MatSetPreallocationCOOLocal(mat_coo, num_entries, rows_petsc, cols_petsc));
687     free(rows_petsc);
688     free(cols_petsc);
689     if (!ctx->coo_values_full) PetscCallCeed(ctx->ceed, CeedVectorCreate(ctx->ceed, num_entries, &ctx->coo_values_full));
690     PetscCall(PetscRealloc(++ctx->num_mats_assembled_full * sizeof(Mat), &ctx->mats_assembled_full));
691     ctx->mats_assembled_full[ctx->num_mats_assembled_full - 1] = mat_coo;
692     PetscCall(PetscLogEventEnd(MATCEED_SETUP_FULL, mat_ceed, mat_coo, NULL, NULL));
693     PetscCall(PetscLogStagePop());
694   }
695   PetscFunctionReturn(PETSC_SUCCESS);
696 }
697 
698 /**
699   @brief Assemble a `MATCEED` into a `MATAIJ` or similar.
700          The `mat_coo` preallocation is set to match the sparsity pattern of `mat_ceed`.
701          The caller is responsible for assuring the global and local sizes are compatible, otherwise this function will fail.
702 
703   Collective across MPI processes.
704 
705   @param[in]      mat_ceed  `MATCEED` to assemble
706   @param[in,out]  mat_coo   `MATAIJ` or similar to assemble into
707 
708   @return An error code: 0 - success, otherwise - failure
709 **/
710 PetscErrorCode MatCeedAssembleCOO(Mat mat_ceed, Mat mat_coo) {
711   MatCeedContext ctx;
712 
713   PetscFunctionBeginUser;
714   PetscCall(MatShellGetContext(mat_ceed, &ctx));
715 
716   // Set COO pattern if needed
717   {
718     CeedInt index = -1;
719 
720     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
721       if (ctx->mats_assembled_full[i] == mat_coo) index = i;
722     }
723     if (index == -1) PetscCall(MatCeedSetPreallocationCOO(mat_ceed, mat_coo));
724   }
725 
726   // Assemble mat_ceed
727   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
728   PetscCall(MatAssemblyBegin(mat_coo, MAT_FINAL_ASSEMBLY));
729   {
730     const CeedScalar *values;
731     MatType           mat_type;
732     CeedMemType       mem_type = CEED_MEM_HOST;
733     PetscBool         is_spd, is_spd_known;
734 
735     PetscCall(MatGetType(mat_coo, &mat_type));
736     if (strstr(mat_type, "cusparse")) mem_type = CEED_MEM_DEVICE;
737     else if (strstr(mat_type, "kokkos")) mem_type = CEED_MEM_DEVICE;
738     else mem_type = CEED_MEM_HOST;
739 
740     PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
741     PetscCallCeed(ctx->ceed, CeedOperatorLinearAssemble(ctx->op_mult, ctx->coo_values_full));
742     PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL_CEEDOP, mat_ceed, mat_coo, NULL, NULL));
743     PetscCallCeed(ctx->ceed, CeedVectorGetArrayRead(ctx->coo_values_full, mem_type, &values));
744     PetscCall(MatSetValuesCOO(mat_coo, values, INSERT_VALUES));
745     PetscCall(MatIsSPDKnown(mat_ceed, &is_spd_known, &is_spd));
746     if (is_spd_known) PetscCall(MatSetOption(mat_coo, MAT_SPD, is_spd));
747     PetscCallCeed(ctx->ceed, CeedVectorRestoreArrayRead(ctx->coo_values_full, &values));
748   }
749   PetscCall(MatAssemblyEnd(mat_coo, MAT_FINAL_ASSEMBLY));
750   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_FULL, mat_ceed, mat_coo, NULL, NULL));
751   PetscFunctionReturn(PETSC_SUCCESS);
752 }
753 
754 /**
755   @brief Set the current value of a context field for a `MatCEED`.
756 
757   Not collective across MPI processes.
758 
759   @param[in,out]  mat    `MatCEED`
760   @param[in]      name   Name of the context field
761   @param[in]      value  New context field value
762 
763   @return An error code: 0 - success, otherwise - failure
764 **/
765 PetscErrorCode MatCeedSetContextDouble(Mat mat, const char *name, double value) {
766   PetscBool      was_updated = PETSC_FALSE;
767   MatCeedContext ctx;
768 
769   PetscFunctionBeginUser;
770   PetscCall(MatShellGetContext(mat, &ctx));
771   {
772     CeedContextFieldLabel label = NULL;
773 
774     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult, name, &label));
775     if (label) {
776       double set_value = 2 * value + 1.0;
777 
778       PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
779       if (set_value != value) {
780         PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult, label, &value));
781         was_updated = PETSC_TRUE;
782       }
783     }
784     if (ctx->op_mult_transpose) {
785       label = NULL;
786       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(ctx->op_mult_transpose, name, &label));
787       if (label) {
788         double set_value = 2 * value + 1.0;
789 
790         PetscCall(MatCeedGetContextDouble(mat, name, &set_value));
791         if (set_value != value) {
792           PetscCallCeed(ctx->ceed, CeedOperatorSetContextDouble(ctx->op_mult_transpose, label, &value));
793           was_updated = PETSC_TRUE;
794         }
795       }
796     }
797   }
798   if (was_updated) {
799     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
800     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
801   }
802   PetscFunctionReturn(PETSC_SUCCESS);
803 }
804 
805 /**
806   @brief Get the current value of a context field for a `MatCEED`.
807 
808   Not collective across MPI processes.
809 
810   @param[in]   mat    `MatCEED`
811   @param[in]   name   Name of the context field
812   @param[out]  value  Current context field value
813 
814   @return An error code: 0 - success, otherwise - failure
815 **/
816 PetscErrorCode MatCeedGetContextDouble(Mat mat, const char *name, double *value) {
817   MatCeedContext ctx;
818 
819   PetscFunctionBeginUser;
820   PetscCall(MatShellGetContext(mat, &ctx));
821   {
822     CeedContextFieldLabel label = NULL;
823     CeedOperator          op    = ctx->op_mult;
824 
825     PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
826     if (!label && ctx->op_mult_transpose) {
827       op = ctx->op_mult_transpose;
828       PetscCallCeed(ctx->ceed, CeedOperatorGetContextFieldLabel(op, name, &label));
829     }
830     if (label) {
831       PetscSizeT    num_values;
832       const double *values_ceed;
833 
834       PetscCallCeed(ctx->ceed, CeedOperatorGetContextDoubleRead(op, label, &num_values, &values_ceed));
835       *value = values_ceed[0];
836       PetscCallCeed(ctx->ceed, CeedOperatorRestoreContextDoubleRead(op, label, &values_ceed));
837     }
838   }
839   PetscFunctionReturn(PETSC_SUCCESS);
840 }
841 
842 /**
843   @brief Set the current `PetscReal` value of a context field for a `MatCEED`.
844 
845   Not collective across MPI processes.
846 
847   @param[in,out]  mat    `MatCEED`
848   @param[in]      name   Name of the context field
849   @param[in]      value  New context field value
850 
851   @return An error code: 0 - success, otherwise - failure
852 **/
853 PetscErrorCode MatCeedSetContextReal(Mat mat, const char *name, PetscReal value) {
854   double value_double = value;
855 
856   PetscFunctionBeginUser;
857   PetscCall(MatCeedSetContextDouble(mat, name, value_double));
858   PetscFunctionReturn(PETSC_SUCCESS);
859 }
860 
861 /**
862   @brief Get the current `PetscReal` value of a context field for a `MatCEED`.
863 
864   Not collective across MPI processes.
865 
866   @param[in]   mat    `MatCEED`
867   @param[in]   name   Name of the context field
868   @param[out]  value  Current context field value
869 
870   @return An error code: 0 - success, otherwise - failure
871 **/
872 PetscErrorCode MatCeedGetContextReal(Mat mat, const char *name, PetscReal *value) {
873   double value_double = 0.0;
874 
875   PetscFunctionBeginUser;
876   PetscCall(MatCeedGetContextDouble(mat, name, &value_double));
877   *value = value_double;
878   PetscFunctionReturn(PETSC_SUCCESS);
879 }
880 
881 /**
882   @brief Set the current time for a `MatCEED`.
883 
884   Not collective across MPI processes.
885 
886   @param[in,out]  mat   `MatCEED`
887   @param[in]      time  Current time
888 
889   @return An error code: 0 - success, otherwise - failure
890 **/
891 PetscErrorCode MatCeedSetTime(Mat mat, PetscReal time) {
892   PetscFunctionBeginUser;
893   {
894     double time_ceed = time;
895 
896     PetscCall(MatCeedSetContextDouble(mat, "time", time_ceed));
897   }
898   PetscFunctionReturn(PETSC_SUCCESS);
899 }
900 
901 /**
902   @brief Get the current time for a `MatCEED`.
903 
904   Not collective across MPI processes.
905 
906   @param[in]   mat   `MatCEED`
907   @param[out]  time  Current time, or -1.0 if the boundary evaluator has no time field
908 
909   @return An error code: 0 - success, otherwise - failure
910 **/
911 PetscErrorCode MatCeedGetTime(Mat mat, PetscReal *time) {
912   PetscFunctionBeginUser;
913   *time = -1.0;
914   {
915     double time_ceed = -1.0;
916 
917     PetscCall(MatCeedGetContextDouble(mat, "time", &time_ceed));
918     *time = time_ceed;
919   }
920   PetscFunctionReturn(PETSC_SUCCESS);
921 }
922 
923 /**
924   @brief Set the current time step for a `MatCEED`.
925 
926   Not collective across MPI processes.
927 
928   @param[in,out]  mat  `MatCEED`
929   @param[in]      dt   Current time step
930 
931   @return An error code: 0 - success, otherwise - failure
932 **/
933 PetscErrorCode MatCeedSetDt(Mat mat, PetscReal dt) {
934   PetscFunctionBeginUser;
935   {
936     double dt_ceed = dt;
937 
938     PetscCall(MatCeedSetContextDouble(mat, "dt", dt_ceed));
939   }
940   PetscFunctionReturn(PETSC_SUCCESS);
941 }
942 
943 /**
944   @brief Set the Jacobian shifts for a `MatCEED`.
945 
946   Not collective across MPI processes.
947 
948   @param[in,out]  mat      `MatCEED`
949   @param[in]      shift_v  Velocity shift
950   @param[in]      shift_a  Acceleration shift
951 
952   @return An error code: 0 - success, otherwise - failure
953 **/
954 PetscErrorCode MatCeedSetShifts(Mat mat, PetscReal shift_v, PetscReal shift_a) {
955   PetscFunctionBeginUser;
956   {
957     double shift_v_ceed = shift_v;
958 
959     PetscCall(MatCeedSetContextDouble(mat, "shift v", shift_v_ceed));
960   }
961   if (shift_a) {
962     double shift_a_ceed = shift_a;
963 
964     PetscCall(MatCeedSetContextDouble(mat, "shift a", shift_a_ceed));
965   }
966   PetscFunctionReturn(PETSC_SUCCESS);
967 }
968 
969 /**
970   @brief Set user context for a `MATCEED`.
971 
972   Collective across MPI processes.
973 
974   @param[in,out]  mat  `MATCEED`
975   @param[in]      f    The context destroy function, or NULL
976   @param[in]      ctx  User context, or NULL to unset
977 
978   @return An error code: 0 - success, otherwise - failure
979 **/
980 PetscErrorCode MatCeedSetContext(Mat mat, PetscErrorCode (*f)(void *), void *ctx) {
981   PetscContainer user_ctx = NULL;
982 
983   PetscFunctionBeginUser;
984   if (ctx) {
985     PetscCall(PetscContainerCreate(PetscObjectComm((PetscObject)mat), &user_ctx));
986     PetscCall(PetscContainerSetPointer(user_ctx, ctx));
987     PetscCall(PetscContainerSetUserDestroy(user_ctx, f));
988   }
989   PetscCall(PetscObjectCompose((PetscObject)mat, "MatCeed user context", (PetscObject)user_ctx));
990   PetscCall(PetscContainerDestroy(&user_ctx));
991   PetscFunctionReturn(PETSC_SUCCESS);
992 }
993 
994 /**
995   @brief Retrieve the user context for a `MATCEED`.
996 
997   Collective across MPI processes.
998 
999   @param[in,out]  mat  `MATCEED`
1000   @param[in]      ctx  User context
1001 
1002   @return An error code: 0 - success, otherwise - failure
1003 **/
1004 PetscErrorCode MatCeedGetContext(Mat mat, void *ctx) {
1005   PetscContainer user_ctx;
1006 
1007   PetscFunctionBeginUser;
1008   PetscCall(PetscObjectQuery((PetscObject)mat, "MatCeed user context", (PetscObject *)&user_ctx));
1009   if (user_ctx) PetscCall(PetscContainerGetPointer(user_ctx, (void **)ctx));
1010   else *(void **)ctx = NULL;
1011   PetscFunctionReturn(PETSC_SUCCESS);
1012 }
1013 /**
1014   @brief Set a user defined matrix operation for a `MATCEED` matrix.
1015 
1016   Within each user-defined routine, the user should call `MatCeedGetContext()` to obtain the user-defined context that was set by
1017 `MatCeedSetContext()`.
1018 
1019   Collective across MPI processes.
1020 
1021   @param[in,out]  mat  `MATCEED`
1022   @param[in]      op   Name of the `MatOperation`
1023   @param[in]      g    Function that provides the operation
1024 
1025   @return An error code: 0 - success, otherwise - failure
1026 **/
1027 PetscErrorCode MatCeedSetOperation(Mat mat, MatOperation op, void (*g)(void)) {
1028   PetscFunctionBeginUser;
1029   PetscCall(MatShellSetOperation(mat, op, g));
1030   PetscFunctionReturn(PETSC_SUCCESS);
1031 }
1032 
1033 /**
1034   @brief Sets the default COO matrix type as a string from the `MATCEED`.
1035 
1036   Collective across MPI processes.
1037 
1038   @param[in,out]  mat   `MATCEED`
1039   @param[in]      type  COO `MatType` to set
1040 
1041   @return An error code: 0 - success, otherwise - failure
1042 **/
1043 PetscErrorCode MatCeedSetCOOMatType(Mat mat, MatType type) {
1044   MatCeedContext ctx;
1045 
1046   PetscFunctionBeginUser;
1047   PetscCall(MatShellGetContext(mat, &ctx));
1048   // Check if same
1049   {
1050     size_t    len_old, len_new;
1051     PetscBool is_same = PETSC_FALSE;
1052 
1053     PetscCall(PetscStrlen(ctx->coo_mat_type, &len_old));
1054     PetscCall(PetscStrlen(type, &len_new));
1055     if (len_old == len_new) PetscCall(PetscStrncmp(ctx->coo_mat_type, type, len_old, &is_same));
1056     if (is_same) PetscFunctionReturn(PETSC_SUCCESS);
1057   }
1058   // Clean up old mats in different format
1059   // LCOV_EXCL_START
1060   if (ctx->mat_assembled_full_internal) {
1061     for (PetscInt i = 0; i < ctx->num_mats_assembled_full; i++) {
1062       if (ctx->mats_assembled_full[i] == ctx->mat_assembled_full_internal) {
1063         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_full; j++) {
1064           ctx->mats_assembled_full[j - 1] = ctx->mats_assembled_full[j];
1065         }
1066         ctx->num_mats_assembled_full--;
1067         // Note: we'll realloc this array again, so no need to shrink the allocation
1068         PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1069       }
1070     }
1071   }
1072   if (ctx->mat_assembled_pbd_internal) {
1073     for (PetscInt i = 0; i < ctx->num_mats_assembled_pbd; i++) {
1074       if (ctx->mats_assembled_pbd[i] == ctx->mat_assembled_pbd_internal) {
1075         for (PetscInt j = i + 1; j < ctx->num_mats_assembled_pbd; j++) {
1076           ctx->mats_assembled_pbd[j - 1] = ctx->mats_assembled_pbd[j];
1077         }
1078         // Note: we'll realloc this array again, so no need to shrink the allocation
1079         ctx->num_mats_assembled_pbd--;
1080         PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1081       }
1082     }
1083   }
1084   PetscCall(PetscFree(ctx->coo_mat_type));
1085   PetscCall(PetscStrallocpy(type, &ctx->coo_mat_type));
1086   PetscFunctionReturn(PETSC_SUCCESS);
1087   // LCOV_EXCL_STOP
1088 }
1089 
1090 /**
1091   @brief Gets the default COO matrix type as a string from the `MATCEED`.
1092 
1093   Collective across MPI processes.
1094 
1095   @param[in,out]  mat   `MATCEED`
1096   @param[in]      type  COO `MatType`
1097 
1098   @return An error code: 0 - success, otherwise - failure
1099 **/
1100 PetscErrorCode MatCeedGetCOOMatType(Mat mat, MatType *type) {
1101   MatCeedContext ctx;
1102 
1103   PetscFunctionBeginUser;
1104   PetscCall(MatShellGetContext(mat, &ctx));
1105   *type = ctx->coo_mat_type;
1106   PetscFunctionReturn(PETSC_SUCCESS);
1107 }
1108 
1109 /**
1110   @brief Set input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1111 
1112   Not collective across MPI processes.
1113 
1114   @param[in,out]  mat              `MATCEED`
1115   @param[in]      X_loc            Input PETSc local vector, or NULL
1116   @param[in]      Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1117 
1118   @return An error code: 0 - success, otherwise - failure
1119 **/
1120 PetscErrorCode MatCeedSetLocalVectors(Mat mat, Vec X_loc, Vec Y_loc_transpose) {
1121   MatCeedContext ctx;
1122 
1123   PetscFunctionBeginUser;
1124   PetscCall(MatShellGetContext(mat, &ctx));
1125   if (X_loc) {
1126     PetscInt len_old, len_new;
1127 
1128     PetscCall(VecGetSize(ctx->X_loc, &len_old));
1129     PetscCall(VecGetSize(X_loc, &len_new));
1130     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB, "new X_loc length %" PetscInt_FMT " should match old X_loc length %" PetscInt_FMT,
1131                len_new, len_old);
1132     PetscCall(VecReferenceCopy(X_loc, &ctx->X_loc));
1133   }
1134   if (Y_loc_transpose) {
1135     PetscInt len_old, len_new;
1136 
1137     PetscCall(VecGetSize(ctx->Y_loc_transpose, &len_old));
1138     PetscCall(VecGetSize(Y_loc_transpose, &len_new));
1139     PetscCheck(len_old == len_new, PETSC_COMM_SELF, PETSC_ERR_LIB,
1140                "new Y_loc_transpose length %" PetscInt_FMT " should match old Y_loc_transpose length %" PetscInt_FMT, len_new, len_old);
1141     PetscCall(VecReferenceCopy(Y_loc_transpose, &ctx->Y_loc_transpose));
1142   }
1143   PetscFunctionReturn(PETSC_SUCCESS);
1144 }
1145 
1146 /**
1147   @brief Get input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1148 
1149   Not collective across MPI processes.
1150 
1151   @param[in,out]  mat              `MATCEED`
1152   @param[out]     X_loc            Input PETSc local vector, or NULL
1153   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1154 
1155   @return An error code: 0 - success, otherwise - failure
1156 **/
1157 PetscErrorCode MatCeedGetLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1158   MatCeedContext ctx;
1159 
1160   PetscFunctionBeginUser;
1161   PetscCall(MatShellGetContext(mat, &ctx));
1162   if (X_loc) {
1163     *X_loc = NULL;
1164     PetscCall(VecReferenceCopy(ctx->X_loc, X_loc));
1165   }
1166   if (Y_loc_transpose) {
1167     *Y_loc_transpose = NULL;
1168     PetscCall(VecReferenceCopy(ctx->Y_loc_transpose, Y_loc_transpose));
1169   }
1170   PetscFunctionReturn(PETSC_SUCCESS);
1171 }
1172 
1173 /**
1174   @brief Restore input local vectors for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1175 
1176   Not collective across MPI processes.
1177 
1178   @param[in,out]  mat              MatCeed
1179   @param[out]     X_loc            Input PETSc local vector, or NULL
1180   @param[out]     Y_loc_transpose  Input PETSc local vector for transpose operation, or NULL
1181 
1182   @return An error code: 0 - success, otherwise - failure
1183 **/
1184 PetscErrorCode MatCeedRestoreLocalVectors(Mat mat, Vec *X_loc, Vec *Y_loc_transpose) {
1185   PetscFunctionBeginUser;
1186   if (X_loc) PetscCall(VecDestroy(X_loc));
1187   if (Y_loc_transpose) PetscCall(VecDestroy(Y_loc_transpose));
1188   PetscFunctionReturn(PETSC_SUCCESS);
1189 }
1190 
1191 /**
1192   @brief Get libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1193 
1194   Not collective across MPI processes.
1195 
1196   @param[in,out]  mat                MatCeed
1197   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1198   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1199 
1200   @return An error code: 0 - success, otherwise - failure
1201 **/
1202 PetscErrorCode MatCeedGetCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1203   MatCeedContext ctx;
1204 
1205   PetscFunctionBeginUser;
1206   PetscCall(MatShellGetContext(mat, &ctx));
1207   if (op_mult) {
1208     *op_mult = NULL;
1209     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult, op_mult));
1210   }
1211   if (op_mult_transpose) {
1212     *op_mult_transpose = NULL;
1213     PetscCallCeed(ctx->ceed, CeedOperatorReferenceCopy(ctx->op_mult_transpose, op_mult_transpose));
1214   }
1215   PetscFunctionReturn(PETSC_SUCCESS);
1216 }
1217 
1218 /**
1219   @brief Restore libCEED `CeedOperator` for `MATCEED` `MatMult()` and `MatMultTranspose()` operations.
1220 
1221   Not collective across MPI processes.
1222 
1223   @param[in,out]  mat                MatCeed
1224   @param[out]     op_mult            libCEED `CeedOperator` for `MatMult()`, or NULL
1225   @param[out]     op_mult_transpose  libCEED `CeedOperator` for `MatMultTranspose()`, or NULL
1226 
1227   @return An error code: 0 - success, otherwise - failure
1228 **/
1229 PetscErrorCode MatCeedRestoreCeedOperators(Mat mat, CeedOperator *op_mult, CeedOperator *op_mult_transpose) {
1230   MatCeedContext ctx;
1231 
1232   PetscFunctionBeginUser;
1233   PetscCall(MatShellGetContext(mat, &ctx));
1234   if (op_mult) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult));
1235   if (op_mult_transpose) PetscCallCeed(ctx->ceed, CeedOperatorDestroy(op_mult_transpose));
1236   PetscFunctionReturn(PETSC_SUCCESS);
1237 }
1238 
1239 /**
1240   @brief Set `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1241 
1242   Not collective across MPI processes.
1243 
1244   @param[in,out]  mat                       MatCeed
1245   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1246   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1247 
1248   @return An error code: 0 - success, otherwise - failure
1249 **/
1250 PetscErrorCode MatCeedSetLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1251   MatCeedContext ctx;
1252 
1253   PetscFunctionBeginUser;
1254   PetscCall(MatShellGetContext(mat, &ctx));
1255   if (log_event_mult) ctx->log_event_mult = log_event_mult;
1256   if (log_event_mult_transpose) ctx->log_event_mult_transpose = log_event_mult_transpose;
1257   PetscFunctionReturn(PETSC_SUCCESS);
1258 }
1259 
1260 /**
1261   @brief Get `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1262 
1263   Not collective across MPI processes.
1264 
1265   @param[in,out]  mat                       MatCeed
1266   @param[out]     log_event_mult            `PetscLogEvent` for forward evaluation, or NULL
1267   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose evaluation, or NULL
1268 
1269   @return An error code: 0 - success, otherwise - failure
1270 **/
1271 PetscErrorCode MatCeedGetLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1272   MatCeedContext ctx;
1273 
1274   PetscFunctionBeginUser;
1275   PetscCall(MatShellGetContext(mat, &ctx));
1276   if (log_event_mult) *log_event_mult = ctx->log_event_mult;
1277   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_mult_transpose;
1278   PetscFunctionReturn(PETSC_SUCCESS);
1279 }
1280 
1281 /**
1282   @brief Set `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1283 
1284   Not collective across MPI processes.
1285 
1286   @param[in,out]  mat                       MatCeed
1287   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1288   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1289 
1290   @return An error code: 0 - success, otherwise - failure
1291 **/
1292 PetscErrorCode MatCeedSetCeedOperatorLogEvents(Mat mat, PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose) {
1293   MatCeedContext ctx;
1294 
1295   PetscFunctionBeginUser;
1296   PetscCall(MatShellGetContext(mat, &ctx));
1297   if (log_event_mult) ctx->log_event_ceed_mult = log_event_mult;
1298   if (log_event_mult_transpose) ctx->log_event_ceed_mult_transpose = log_event_mult_transpose;
1299   PetscFunctionReturn(PETSC_SUCCESS);
1300 }
1301 
1302 /**
1303   @brief Get `CeedOperator` `PetscLogEvent` for `MATCEED` `MatMult()` and `MatMultTranspose()` operators.
1304 
1305   Not collective across MPI processes.
1306 
1307   @param[in,out]  mat                       MatCeed
1308   @param[out]     log_event_mult            `PetscLogEvent` for forward `CeedOperator` evaluation, or NULL
1309   @param[out]     log_event_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation, or NULL
1310 
1311   @return An error code: 0 - success, otherwise - failure
1312 **/
1313 PetscErrorCode MatCeedGetCeedOperatorLogEvents(Mat mat, PetscLogEvent *log_event_mult, PetscLogEvent *log_event_mult_transpose) {
1314   MatCeedContext ctx;
1315 
1316   PetscFunctionBeginUser;
1317   PetscCall(MatShellGetContext(mat, &ctx));
1318   if (log_event_mult) *log_event_mult = ctx->log_event_ceed_mult;
1319   if (log_event_mult_transpose) *log_event_mult_transpose = ctx->log_event_ceed_mult_transpose;
1320   PetscFunctionReturn(PETSC_SUCCESS);
1321 }
1322 
1323 // -----------------------------------------------------------------------------
1324 // Operator context data
1325 // -----------------------------------------------------------------------------
1326 
1327 /**
1328   @brief Setup context data for operator application.
1329 
1330   Collective across MPI processes.
1331 
1332   @param[in]   dm_x                           Input `DM`
1333   @param[in]   dm_y                           Output `DM`
1334   @param[in]   X_loc                          Input PETSc local vector, or NULL
1335   @param[in]   Y_loc_transpose                Input PETSc local vector for transpose operation, or NULL
1336   @param[in]   op_mult                        `CeedOperator` for forward evaluation
1337   @param[in]   op_mult_transpose              `CeedOperator` for transpose evaluation
1338   @param[in]   log_event_mult                 `PetscLogEvent` for forward evaluation
1339   @param[in]   log_event_mult_transpose       `PetscLogEvent` for transpose evaluation
1340   @param[in]   log_event_ceed_mult            `PetscLogEvent` for forward `CeedOperator` evaluation
1341   @param[in]   log_event_ceed_mult_transpose  `PetscLogEvent` for transpose `CeedOperator` evaluation
1342   @param[out]  ctx                            Context data for operator evaluation
1343 
1344   @return An error code: 0 - success, otherwise - failure
1345 **/
1346 PetscErrorCode MatCeedContextCreate(DM dm_x, DM dm_y, Vec X_loc, Vec Y_loc_transpose, CeedOperator op_mult, CeedOperator op_mult_transpose,
1347                                     PetscLogEvent log_event_mult, PetscLogEvent log_event_mult_transpose, PetscLogEvent log_event_ceed_mult,
1348                                     PetscLogEvent log_event_ceed_mult_transpose, MatCeedContext *ctx) {
1349   CeedSize x_loc_len, y_loc_len;
1350 
1351   PetscFunctionBeginUser;
1352 
1353   // Allocate
1354   PetscCall(PetscNew(ctx));
1355   (*ctx)->ref_count = 1;
1356 
1357   // Logging
1358   (*ctx)->log_event_mult                = log_event_mult;
1359   (*ctx)->log_event_mult_transpose      = log_event_mult_transpose;
1360   (*ctx)->log_event_ceed_mult           = log_event_ceed_mult;
1361   (*ctx)->log_event_ceed_mult_transpose = log_event_ceed_mult_transpose;
1362 
1363   // PETSc objects
1364   PetscCall(DMReferenceCopy(dm_x, &(*ctx)->dm_x));
1365   PetscCall(DMReferenceCopy(dm_y, &(*ctx)->dm_y));
1366   if (X_loc) PetscCall(VecReferenceCopy(X_loc, &(*ctx)->X_loc));
1367   if (Y_loc_transpose) PetscCall(VecReferenceCopy(Y_loc_transpose, &(*ctx)->Y_loc_transpose));
1368 
1369   // Memtype
1370   {
1371     const PetscScalar *x;
1372     Vec                X;
1373 
1374     PetscCall(DMGetLocalVector(dm_x, &X));
1375     PetscCall(VecGetArrayReadAndMemType(X, &x, &(*ctx)->mem_type));
1376     PetscCall(VecRestoreArrayReadAndMemType(X, &x));
1377     PetscCall(DMRestoreLocalVector(dm_x, &X));
1378   }
1379 
1380   // libCEED objects
1381   PetscCheck(CeedOperatorGetCeed(op_mult, &(*ctx)->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB,
1382              "retrieving Ceed context object failed");
1383   PetscCallCeed((*ctx)->ceed, CeedReference((*ctx)->ceed));
1384   PetscCallCeed((*ctx)->ceed, CeedOperatorGetActiveVectorLengths(op_mult, &x_loc_len, &y_loc_len));
1385   PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult, &(*ctx)->op_mult));
1386   if (op_mult_transpose) PetscCallCeed((*ctx)->ceed, CeedOperatorReferenceCopy(op_mult_transpose, &(*ctx)->op_mult_transpose));
1387   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, x_loc_len, &(*ctx)->x_loc));
1388   PetscCallCeed((*ctx)->ceed, CeedVectorCreate((*ctx)->ceed, y_loc_len, &(*ctx)->y_loc));
1389 
1390   // Flop counting
1391   {
1392     CeedSize ceed_flops_estimate = 0;
1393 
1394     PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult, &ceed_flops_estimate));
1395     (*ctx)->flops_mult = ceed_flops_estimate;
1396     if (op_mult_transpose) {
1397       PetscCallCeed((*ctx)->ceed, CeedOperatorGetFlopsEstimate(op_mult_transpose, &ceed_flops_estimate));
1398       (*ctx)->flops_mult_transpose = ceed_flops_estimate;
1399     }
1400   }
1401 
1402   // Check sizes
1403   if (x_loc_len > 0 || y_loc_len > 0) {
1404     CeedSize ctx_x_loc_len, ctx_y_loc_len;
1405     PetscInt X_loc_len, dm_x_loc_len, Y_loc_len, dm_y_loc_len;
1406     Vec      dm_X_loc, dm_Y_loc;
1407 
1408     // -- Input
1409     PetscCall(DMGetLocalVector(dm_x, &dm_X_loc));
1410     PetscCall(VecGetLocalSize(dm_X_loc, &dm_x_loc_len));
1411     PetscCall(DMRestoreLocalVector(dm_x, &dm_X_loc));
1412     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->x_loc, &ctx_x_loc_len));
1413     if (X_loc) {
1414       PetscCall(VecGetLocalSize(X_loc, &X_loc_len));
1415       PetscCheck(X_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1416                  "X_loc (%" PetscInt_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions", X_loc_len, dm_x_loc_len);
1417     }
1418     PetscCheck(x_loc_len == dm_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_x (%" PetscInt_FMT ") dimensions",
1419                x_loc_len, dm_x_loc_len);
1420     PetscCheck(x_loc_len == ctx_x_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "x_loc (%" CeedSize_FMT ") must match op dimensions (%" CeedSize_FMT ")",
1421                x_loc_len, ctx_x_loc_len);
1422 
1423     // -- Output
1424     PetscCall(DMGetLocalVector(dm_y, &dm_Y_loc));
1425     PetscCall(VecGetLocalSize(dm_Y_loc, &dm_y_loc_len));
1426     PetscCall(DMRestoreLocalVector(dm_y, &dm_Y_loc));
1427     PetscCallCeed((*ctx)->ceed, CeedVectorGetLength((*ctx)->y_loc, &ctx_y_loc_len));
1428     PetscCheck(ctx_y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB, "op (%" CeedSize_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions",
1429                ctx_y_loc_len, dm_y_loc_len);
1430 
1431     // -- Transpose
1432     if (Y_loc_transpose) {
1433       PetscCall(VecGetLocalSize(Y_loc_transpose, &Y_loc_len));
1434       PetscCheck(Y_loc_len == dm_y_loc_len, PETSC_COMM_SELF, PETSC_ERR_LIB,
1435                  "Y_loc_transpose (%" PetscInt_FMT ") must match dm_y (%" PetscInt_FMT ") dimensions", Y_loc_len, dm_y_loc_len);
1436     }
1437   }
1438   PetscFunctionReturn(PETSC_SUCCESS);
1439 }
1440 
1441 /**
1442   @brief Increment reference counter for `MATCEED` context.
1443 
1444   Not collective across MPI processes.
1445 
1446   @param[in,out]  ctx  Context data
1447 
1448   @return An error code: 0 - success, otherwise - failure
1449 **/
1450 PetscErrorCode MatCeedContextReference(MatCeedContext ctx) {
1451   PetscFunctionBeginUser;
1452   ctx->ref_count++;
1453   PetscFunctionReturn(PETSC_SUCCESS);
1454 }
1455 
1456 /**
1457   @brief Copy reference for `MATCEED`.
1458          Note: If `ctx_copy` is non-null, it is assumed to be a valid pointer to a `MatCeedContext`.
1459 
1460   Not collective across MPI processes.
1461 
1462   @param[in]   ctx       Context data
1463   @param[out]  ctx_copy  Copy of pointer to context data
1464 
1465   @return An error code: 0 - success, otherwise - failure
1466 **/
1467 PetscErrorCode MatCeedContextReferenceCopy(MatCeedContext ctx, MatCeedContext *ctx_copy) {
1468   PetscFunctionBeginUser;
1469   PetscCall(MatCeedContextReference(ctx));
1470   PetscCall(MatCeedContextDestroy(*ctx_copy));
1471   *ctx_copy = ctx;
1472   PetscFunctionReturn(PETSC_SUCCESS);
1473 }
1474 
1475 /**
1476   @brief Destroy context data for operator application.
1477 
1478   Collective across MPI processes.
1479 
1480   @param[in,out]  ctx  Context data for operator evaluation
1481 
1482   @return An error code: 0 - success, otherwise - failure
1483 **/
1484 PetscErrorCode MatCeedContextDestroy(MatCeedContext ctx) {
1485   PetscFunctionBeginUser;
1486   if (!ctx || --ctx->ref_count > 0) PetscFunctionReturn(PETSC_SUCCESS);
1487 
1488   // PETSc objects
1489   PetscCall(DMDestroy(&ctx->dm_x));
1490   PetscCall(DMDestroy(&ctx->dm_y));
1491   PetscCall(VecDestroy(&ctx->X_loc));
1492   PetscCall(VecDestroy(&ctx->Y_loc_transpose));
1493   PetscCall(MatDestroy(&ctx->mat_assembled_full_internal));
1494   PetscCall(MatDestroy(&ctx->mat_assembled_pbd_internal));
1495   PetscCall(PetscFree(ctx->coo_mat_type));
1496   PetscCall(PetscFree(ctx->mats_assembled_full));
1497   PetscCall(PetscFree(ctx->mats_assembled_pbd));
1498 
1499   // libCEED objects
1500   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->x_loc));
1501   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->y_loc));
1502   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_full));
1503   PetscCallCeed(ctx->ceed, CeedVectorDestroy(&ctx->coo_values_pbd));
1504   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult));
1505   PetscCallCeed(ctx->ceed, CeedOperatorDestroy(&ctx->op_mult_transpose));
1506   PetscCheck(CeedDestroy(&ctx->ceed) == CEED_ERROR_SUCCESS, PETSC_COMM_SELF, PETSC_ERR_LIB, "destroying libCEED context object failed");
1507 
1508   // Deallocate
1509   ctx->is_destroyed = PETSC_TRUE;  // Flag as destroyed in case someone has stale ref
1510   PetscCall(PetscFree(ctx));
1511   PetscFunctionReturn(PETSC_SUCCESS);
1512 }
1513 
1514 /**
1515   @brief Compute the diagonal of an operator via libCEED.
1516 
1517   Collective across MPI processes.
1518 
1519   @param[in]   A  `MATCEED`
1520   @param[out]  D  Vector holding operator diagonal
1521 
1522   @return An error code: 0 - success, otherwise - failure
1523 **/
1524 PetscErrorCode MatGetDiagonal_Ceed(Mat A, Vec D) {
1525   PetscMemType   mem_type;
1526   Vec            D_loc;
1527   MatCeedContext ctx;
1528 
1529   PetscFunctionBeginUser;
1530   PetscCall(MatShellGetContext(A, &ctx));
1531 
1532   // Place PETSc vector in libCEED vector
1533   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1534   PetscCall(DMGetLocalVector(ctx->dm_x, &D_loc));
1535   PetscCall(VecPetscToCeed(D_loc, &mem_type, ctx->x_loc));
1536 
1537   // Compute Diagonal
1538   PetscCall(PetscLogEventBegin(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1539   PetscCallCeed(ctx->ceed, CeedOperatorLinearAssembleDiagonal(ctx->op_mult, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1540   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL_CEEDOP, A, D, NULL, NULL));
1541 
1542   // Restore PETSc vector
1543   PetscCall(VecCeedToPetsc(ctx->x_loc, mem_type, D_loc));
1544 
1545   // Local-to-Global
1546   PetscCall(VecZeroEntries(D));
1547   PetscCall(DMLocalToGlobal(ctx->dm_x, D_loc, ADD_VALUES, D));
1548   PetscCall(DMRestoreLocalVector(ctx->dm_x, &D_loc));
1549   PetscCall(PetscLogEventEnd(MATCEED_ASSEMBLE_DIAGONAL, A, D, NULL, NULL));
1550   PetscFunctionReturn(PETSC_SUCCESS);
1551 }
1552 
1553 /**
1554   @brief Compute `A X = Y` for a `MATCEED`.
1555 
1556   Collective across MPI processes.
1557 
1558   @param[in]   A  `MATCEED`
1559   @param[in]   X  Input PETSc vector
1560   @param[out]  Y  Output PETSc vector
1561 
1562   @return An error code: 0 - success, otherwise - failure
1563 **/
1564 PetscErrorCode MatMult_Ceed(Mat A, Vec X, Vec Y) {
1565   MatCeedContext ctx;
1566 
1567   PetscFunctionBeginUser;
1568   PetscCall(MatShellGetContext(A, &ctx));
1569   PetscCall(PetscLogEventBegin(ctx->log_event_mult, A, X, Y, NULL));
1570 
1571   {
1572     PetscMemType x_mem_type, y_mem_type;
1573     Vec          X_loc = ctx->X_loc, Y_loc;
1574 
1575     // Get local vectors
1576     if (!ctx->X_loc) PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1577     PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1578 
1579     // Global-to-local
1580     PetscCall(DMGlobalToLocal(ctx->dm_x, X, INSERT_VALUES, X_loc));
1581 
1582     // Setup libCEED vectors
1583     PetscCall(VecReadPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1584     PetscCall(VecZeroEntries(Y_loc));
1585     PetscCall(VecPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1586 
1587     // Apply libCEED operator
1588     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult, A, X, Y, NULL));
1589     PetscCall(PetscLogGpuTimeBegin());
1590     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult, ctx->x_loc, ctx->y_loc, CEED_REQUEST_IMMEDIATE));
1591     PetscCall(PetscLogGpuTimeEnd());
1592     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult, A, X, Y, NULL));
1593 
1594     // Restore PETSc vectors
1595     PetscCall(VecReadCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1596     PetscCall(VecCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1597 
1598     // Local-to-global
1599     PetscCall(VecZeroEntries(Y));
1600     PetscCall(DMLocalToGlobal(ctx->dm_y, Y_loc, ADD_VALUES, Y));
1601 
1602     // Restore local vectors, as needed
1603     if (!ctx->X_loc) PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1604     PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1605   }
1606 
1607   // Log flops
1608   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult));
1609   else PetscCall(PetscLogFlops(ctx->flops_mult));
1610   PetscCall(PetscLogEventEnd(ctx->log_event_mult, A, X, Y, NULL));
1611   PetscFunctionReturn(PETSC_SUCCESS);
1612 }
1613 
1614 /**
1615   @brief Compute `A^T Y = X` for a `MATCEED`.
1616 
1617   Collective across MPI processes.
1618 
1619   @param[in]   A  `MATCEED`
1620   @param[in]   Y  Input PETSc vector
1621   @param[out]  X  Output PETSc vector
1622 
1623   @return An error code: 0 - success, otherwise - failure
1624 **/
1625 PetscErrorCode MatMultTranspose_Ceed(Mat A, Vec Y, Vec X) {
1626   MatCeedContext ctx;
1627 
1628   PetscFunctionBeginUser;
1629   PetscCall(MatShellGetContext(A, &ctx));
1630   PetscCall(PetscLogEventBegin(ctx->log_event_mult_transpose, A, Y, X, NULL));
1631 
1632   {
1633     PetscMemType x_mem_type, y_mem_type;
1634     Vec          X_loc, Y_loc = ctx->Y_loc_transpose;
1635 
1636     // Get local vectors
1637     if (!ctx->Y_loc_transpose) PetscCall(DMGetLocalVector(ctx->dm_y, &Y_loc));
1638     PetscCall(DMGetLocalVector(ctx->dm_x, &X_loc));
1639 
1640     // Global-to-local
1641     PetscCall(DMGlobalToLocal(ctx->dm_y, Y, INSERT_VALUES, Y_loc));
1642 
1643     // Setup libCEED vectors
1644     PetscCall(VecReadPetscToCeed(Y_loc, &y_mem_type, ctx->y_loc));
1645     PetscCall(VecZeroEntries(X_loc));
1646     PetscCall(VecPetscToCeed(X_loc, &x_mem_type, ctx->x_loc));
1647 
1648     // Apply libCEED operator
1649     PetscCall(PetscLogEventBegin(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1650     PetscCall(PetscLogGpuTimeBegin());
1651     PetscCallCeed(ctx->ceed, CeedOperatorApplyAdd(ctx->op_mult_transpose, ctx->y_loc, ctx->x_loc, CEED_REQUEST_IMMEDIATE));
1652     PetscCall(PetscLogGpuTimeEnd());
1653     PetscCall(PetscLogEventEnd(ctx->log_event_ceed_mult_transpose, A, Y, X, NULL));
1654 
1655     // Restore PETSc vectors
1656     PetscCall(VecReadCeedToPetsc(ctx->y_loc, y_mem_type, Y_loc));
1657     PetscCall(VecCeedToPetsc(ctx->x_loc, x_mem_type, X_loc));
1658 
1659     // Local-to-global
1660     PetscCall(VecZeroEntries(X));
1661     PetscCall(DMLocalToGlobal(ctx->dm_x, X_loc, ADD_VALUES, X));
1662 
1663     // Restore local vectors, as needed
1664     if (!ctx->Y_loc_transpose) PetscCall(DMRestoreLocalVector(ctx->dm_y, &Y_loc));
1665     PetscCall(DMRestoreLocalVector(ctx->dm_x, &X_loc));
1666   }
1667 
1668   // Log flops
1669   if (PetscMemTypeDevice(ctx->mem_type)) PetscCall(PetscLogGpuFlops(ctx->flops_mult_transpose));
1670   else PetscCall(PetscLogFlops(ctx->flops_mult_transpose));
1671   PetscCall(PetscLogEventEnd(ctx->log_event_mult_transpose, A, Y, X, NULL));
1672   PetscFunctionReturn(PETSC_SUCCESS);
1673 }
1674