xref: /petsc/src/snes/tests/ex13.c (revision e0b20f2ab553d13822b7e07fd5b8e1e425cbac3a)
15e1f5104SMark static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\
2f9244615SMatthew G. Knepley We solve the Poisson problem in a rectangular domain\n\
3f9244615SMatthew G. Knepley using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n";
45e1f5104SMark 
55e1f5104SMark #include <petscdmplex.h>
65e1f5104SMark #include <petscsnes.h>
75e1f5104SMark #include <petscds.h>
85e1f5104SMark #include <petscconvest.h>
9e6f8f311SMark Adams #if defined(PETSC_HAVE_AMGX)
10e6f8f311SMark Adams   #include <amgx_c.h>
11e6f8f311SMark Adams #endif
125e1f5104SMark 
135e1f5104SMark typedef struct {
14f9244615SMatthew G. Knepley   PetscInt  nit;    /* Number of benchmark iterations */
15f9244615SMatthew G. Knepley   PetscBool strong; /* Do not integrate the Laplacian by parts */
165e1f5104SMark } AppCtx;
175e1f5104SMark 
18d71ae5a4SJacob Faibussowitsch static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx)
19d71ae5a4SJacob Faibussowitsch {
205e1f5104SMark   PetscInt d;
215e1f5104SMark   *u = 0.0;
225e1f5104SMark   for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0 * PETSC_PI * x[d]);
235e1f5104SMark   return 0;
245e1f5104SMark }
255e1f5104SMark 
26d71ae5a4SJacob Faibussowitsch static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
27d71ae5a4SJacob Faibussowitsch {
285e1f5104SMark   PetscInt d;
295e1f5104SMark   for (d = 0; d < dim; ++d) f0[0] += -4.0 * PetscSqr(PETSC_PI) * PetscSinReal(2.0 * PETSC_PI * x[d]);
305e1f5104SMark }
315e1f5104SMark 
32d71ae5a4SJacob Faibussowitsch static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[])
33d71ae5a4SJacob Faibussowitsch {
345e1f5104SMark   PetscInt d;
355e1f5104SMark   for (d = 0; d < dim; ++d) f1[d] = u_x[d];
365e1f5104SMark }
375e1f5104SMark 
38d71ae5a4SJacob Faibussowitsch static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[])
39d71ae5a4SJacob Faibussowitsch {
405e1f5104SMark   PetscInt d;
415e1f5104SMark   for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0;
425e1f5104SMark }
435e1f5104SMark 
44d71ae5a4SJacob Faibussowitsch static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx)
45d71ae5a4SJacob Faibussowitsch {
46f9244615SMatthew G. Knepley   *u = PetscSqr(x[0]) + PetscSqr(x[1]);
47f9244615SMatthew G. Knepley   return 0;
48f9244615SMatthew G. Knepley }
49f9244615SMatthew G. Knepley 
50d71ae5a4SJacob Faibussowitsch static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[])
51d71ae5a4SJacob Faibussowitsch {
52f9244615SMatthew G. Knepley   PetscInt d;
53f9244615SMatthew G. Knepley   for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d * dim + d];
54f9244615SMatthew G. Knepley   f0[0] += 4.0;
55f9244615SMatthew G. Knepley }
56f9244615SMatthew G. Knepley 
57d71ae5a4SJacob Faibussowitsch static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options)
58d71ae5a4SJacob Faibussowitsch {
595e1f5104SMark   PetscFunctionBeginUser;
600c569c6eSMark   options->nit    = 10;
61f9244615SMatthew G. Knepley   options->strong = PETSC_FALSE;
62d0609cedSBarry Smith   PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX");
639566063dSJacob Faibussowitsch   PetscCall(PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL));
649566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL));
65d0609cedSBarry Smith   PetscOptionsEnd();
665e1f5104SMark   PetscFunctionReturn(0);
675e1f5104SMark }
685e1f5104SMark 
69d71ae5a4SJacob Faibussowitsch static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm)
70d71ae5a4SJacob Faibussowitsch {
715e1f5104SMark   PetscFunctionBeginUser;
729566063dSJacob Faibussowitsch   PetscCall(DMCreate(comm, dm));
739566063dSJacob Faibussowitsch   PetscCall(DMSetType(*dm, DMPLEX));
749566063dSJacob Faibussowitsch   PetscCall(DMSetFromOptions(*dm));
759566063dSJacob Faibussowitsch   PetscCall(DMSetApplicationContext(*dm, user));
769566063dSJacob Faibussowitsch   PetscCall(DMViewFromOptions(*dm, NULL, "-dm_view"));
775e1f5104SMark   PetscFunctionReturn(0);
785e1f5104SMark }
795e1f5104SMark 
80d71ae5a4SJacob Faibussowitsch static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user)
81d71ae5a4SJacob Faibussowitsch {
82f9244615SMatthew G. Knepley   PetscDS        ds;
8345480ffeSMatthew G. Knepley   DMLabel        label;
845e1f5104SMark   const PetscInt id = 1;
855e1f5104SMark 
865e1f5104SMark   PetscFunctionBeginUser;
879566063dSJacob Faibussowitsch   PetscCall(DMGetDS(dm, &ds));
889566063dSJacob Faibussowitsch   PetscCall(DMGetLabel(dm, "marker", &label));
89f9244615SMatthew G. Knepley   if (user->strong) {
909566063dSJacob Faibussowitsch     PetscCall(PetscDSSetResidual(ds, 0, f0_strong_u, NULL));
919566063dSJacob Faibussowitsch     PetscCall(PetscDSSetExactSolution(ds, 0, quadratic_u, user));
929566063dSJacob Faibussowitsch     PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))quadratic_u, NULL, user, NULL));
93f9244615SMatthew G. Knepley   } else {
949566063dSJacob Faibussowitsch     PetscCall(PetscDSSetResidual(ds, 0, f0_trig_u, f1_u));
959566063dSJacob Faibussowitsch     PetscCall(PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu));
969566063dSJacob Faibussowitsch     PetscCall(PetscDSSetExactSolution(ds, 0, trig_u, user));
979566063dSJacob Faibussowitsch     PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void))trig_u, NULL, user, NULL));
98f9244615SMatthew G. Knepley   }
995e1f5104SMark   PetscFunctionReturn(0);
1005e1f5104SMark }
1015e1f5104SMark 
102d71ae5a4SJacob Faibussowitsch static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user)
103d71ae5a4SJacob Faibussowitsch {
1045e1f5104SMark   DM             cdm = dm;
1055e1f5104SMark   PetscFE        fe;
1065e1f5104SMark   DMPolytopeType ct;
1075e1f5104SMark   PetscBool      simplex;
1085e1f5104SMark   PetscInt       dim, cStart;
1095e1f5104SMark   char           prefix[PETSC_MAX_PATH_LEN];
1105e1f5104SMark 
1115e1f5104SMark   PetscFunctionBeginUser;
1129566063dSJacob Faibussowitsch   PetscCall(DMGetDimension(dm, &dim));
1139566063dSJacob Faibussowitsch   PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, NULL));
1149566063dSJacob Faibussowitsch   PetscCall(DMPlexGetCellType(dm, cStart, &ct));
1152e776fa0SMark Adams   simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct) + 1 ? PETSC_TRUE : PETSC_FALSE; // false
1165e1f5104SMark   /* Create finite element */
1179566063dSJacob Faibussowitsch   PetscCall(PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name));
1189566063dSJacob Faibussowitsch   PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe));
1199566063dSJacob Faibussowitsch   PetscCall(PetscObjectSetName((PetscObject)fe, name));
1205e1f5104SMark   /* Set discretization and boundary conditions for each mesh */
1219566063dSJacob Faibussowitsch   PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe));
1229566063dSJacob Faibussowitsch   PetscCall(DMCreateDS(dm));
1239566063dSJacob Faibussowitsch   PetscCall((*setup)(dm, user));
1245e1f5104SMark   while (cdm) {
1259566063dSJacob Faibussowitsch     PetscCall(DMCopyDisc(dm, cdm));
1265e1f5104SMark     /* TODO: Check whether the boundary of coarse meshes is marked */
1279566063dSJacob Faibussowitsch     PetscCall(DMGetCoarseDM(cdm, &cdm));
1285e1f5104SMark   }
1299566063dSJacob Faibussowitsch   PetscCall(PetscFEDestroy(&fe));
1305e1f5104SMark   PetscFunctionReturn(0);
1315e1f5104SMark }
1325e1f5104SMark 
133d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv)
134d71ae5a4SJacob Faibussowitsch {
1355e1f5104SMark   DM             dm;   /* Problem specification */
1365e1f5104SMark   SNES           snes; /* Nonlinear solver */
1375e1f5104SMark   Vec            u;    /* Solutions */
1385e1f5104SMark   AppCtx         user; /* User-defined work context */
1392e776fa0SMark Adams   PetscLogDouble time;
1402e776fa0SMark Adams   Mat            Amat;
1415e1f5104SMark 
142327415f7SBarry Smith   PetscFunctionBeginUser;
1439566063dSJacob Faibussowitsch   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
1449566063dSJacob Faibussowitsch   PetscCall(ProcessOptions(PETSC_COMM_WORLD, &user));
1452e776fa0SMark Adams   /* system */
1469566063dSJacob Faibussowitsch   PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes));
1479566063dSJacob Faibussowitsch   PetscCall(CreateMesh(PETSC_COMM_WORLD, &user, &dm));
1489566063dSJacob Faibussowitsch   PetscCall(SNESSetDM(snes, dm));
1499566063dSJacob Faibussowitsch   PetscCall(SetupDiscretization(dm, "potential", SetupPrimalProblem, &user));
1509566063dSJacob Faibussowitsch   PetscCall(DMCreateGlobalVector(dm, &u));
151*e0b20f2aSMark Adams   {
152*e0b20f2aSMark Adams     PetscInt N;
153*e0b20f2aSMark Adams     PetscCall(VecGetSize(u, &N));
154*e0b20f2aSMark Adams     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number equations N = %" PetscInt_FMT "\n", N));
155*e0b20f2aSMark Adams   }
1562e776fa0SMark Adams   PetscCall(SNESSetFromOptions(snes));
1579566063dSJacob Faibussowitsch   PetscCall(PetscObjectSetName((PetscObject)u, "potential"));
1589566063dSJacob Faibussowitsch   PetscCall(DMPlexSetSNESLocalFEM(dm, &user, &user, &user));
1599566063dSJacob Faibussowitsch   PetscCall(DMSNESCheckFromOptions(snes, u));
1602e776fa0SMark Adams   PetscCall(PetscTime(&time));
1612e776fa0SMark Adams   PetscCall(SNESSetUp(snes));
162e6f8f311SMark Adams #if defined(PETSC_HAVE_AMGX)
163e6f8f311SMark Adams   KSP                   ksp;
164e6f8f311SMark Adams   PC                    pc;
165e6f8f311SMark Adams   PetscBool             flg;
166e6f8f311SMark Adams   AMGX_resources_handle rsc;
167e6f8f311SMark Adams   PetscCall(SNESGetKSP(snes, &ksp));
168e6f8f311SMark Adams   PetscCall(KSPGetPC(ksp, &pc));
169e6f8f311SMark Adams   PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCAMGX, &flg));
170e6f8f311SMark Adams   if (flg) {
171e6f8f311SMark Adams     PetscCall(PCAmgXGetResources(pc, (void *)&rsc));
172e6f8f311SMark Adams     /* do ... with resource */
173e6f8f311SMark Adams   }
174e6f8f311SMark Adams #endif
1752e776fa0SMark Adams   PetscCall(SNESGetJacobian(snes, &Amat, NULL, NULL, NULL));
1762e776fa0SMark Adams   PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE));
177b94d7dedSBarry Smith   PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE));
1789566063dSJacob Faibussowitsch   PetscCall(SNESSolve(snes, NULL, u));
1792e776fa0SMark Adams   PetscCall(PetscTimeSubtract(&time));
1805e1f5104SMark   /* Benchmark system */
1810c569c6eSMark   if (user.nit) {
1825e1f5104SMark     Vec      b;
1830c569c6eSMark     PetscInt i;
1842e776fa0SMark Adams #if defined(PETSC_USE_LOG)
1852e776fa0SMark Adams     PetscLogStage kspstage;
1862e776fa0SMark Adams #endif
1872e776fa0SMark Adams     PetscCall(PetscLogStageRegister("Solve only", &kspstage));
1889566063dSJacob Faibussowitsch     PetscCall(PetscLogStagePush(kspstage));
1892e776fa0SMark Adams     PetscCall(SNESGetSolution(snes, &u));
1902e776fa0SMark Adams     PetscCall(SNESGetFunction(snes, &b, NULL, NULL));
1910c569c6eSMark     for (i = 0; i < user.nit; i++) {
1929566063dSJacob Faibussowitsch       PetscCall(VecZeroEntries(u));
1932e776fa0SMark Adams       PetscCall(SNESSolve(snes, NULL, u));
1940c569c6eSMark     }
1959566063dSJacob Faibussowitsch     PetscCall(PetscLogStagePop());
1965e1f5104SMark   }
1979566063dSJacob Faibussowitsch   PetscCall(SNESGetSolution(snes, &u));
1989566063dSJacob Faibussowitsch   PetscCall(VecViewFromOptions(u, NULL, "-potential_view"));
1995e1f5104SMark   /* Cleanup */
2009566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&u));
2019566063dSJacob Faibussowitsch   PetscCall(SNESDestroy(&snes));
2029566063dSJacob Faibussowitsch   PetscCall(DMDestroy(&dm));
2039566063dSJacob Faibussowitsch   PetscCall(PetscFinalize());
204b122ec5aSJacob Faibussowitsch   return 0;
2055e1f5104SMark }
2065e1f5104SMark 
2075e1f5104SMark /*TEST
2085e1f5104SMark 
2095e1f5104SMark   test:
210f9244615SMatthew G. Knepley     suffix: strong
211f9244615SMatthew G. Knepley     requires: triangle
21286081d6eSMark Adams     args: -dm_plex_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong
213f9244615SMatthew G. Knepley 
214f9244615SMatthew G. Knepley   test:
2155e1f5104SMark     suffix: bench
2160c569c6eSMark     nsize: 4
2172e776fa0SMark Adams     args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_faces 2,2,1 -dm_refine 2 -dm_view -ksp_monitor \
21886081d6eSMark Adams        -benchmark_it 1 -dm_plex_box_upper 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_dim 3 -ksp_converged_reason \
21986081d6eSMark Adams        -ksp_norm_type unpreconditioned -ksp_rtol 1.e-6 -ksp_type cg -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 \
22086081d6eSMark Adams        -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev  -mg_levels_pc_type jacobi -pc_gamg_coarse_eq_limit 200 \
22186081d6eSMark Adams        -pc_gamg_coarse_grid_layout_type compact -pc_gamg_esteig_ksp_max_it 5 -pc_gamg_process_eq_limit 200 \
22286081d6eSMark Adams        -pc_gamg_repartition false -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.001 -pc_gamg_threshold_scale .5 \
22386081d6eSMark Adams        -pc_gamg_type agg -pc_type gamg -petscpartitioner_simple_node_grid 1,2,1 -petscpartitioner_simple_process_grid 2,1,1 \
22486081d6eSMark Adams        -petscpartitioner_type simple -potential_petscspace_degree 2 -snes_lag_jacobian -2 -snes_max_it 1 -snes_rtol 1.e-8 -snes_type ksponly -use_gpu_aware_mpi true
2255e1f5104SMark 
22686081d6eSMark Adams   testset:
22786081d6eSMark Adams     nsize: 4
22886081d6eSMark Adams     output_file: output/ex13_comparison.out
22986081d6eSMark Adams     args: -dm_plex_dim 2 -benchmark_it 10 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \
23086081d6eSMark Adams       -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -petscpartitioner_type simple  \
23186081d6eSMark Adams       -dm_plex_simplex 0 -snes_type ksponly -dm_view -ksp_type cg -pc_type gamg -pc_gamg_process_eq_limit 400 \
23286081d6eSMark Adams       -ksp_norm_type unpreconditioned -ksp_converged_reason
2330c569c6eSMark     test:
23418fb0606SStefano Zampini       suffix: comparison
23518fb0606SStefano Zampini     test:
2360c569c6eSMark       suffix: cuda
2370c569c6eSMark       requires: cuda
23886081d6eSMark Adams       args: -dm_mat_type aijcusparse -dm_vec_type cuda
2390c569c6eSMark     test:
2400c569c6eSMark       suffix: kokkos
241dcfd994dSJunchao Zhang       requires: sycl kokkos_kernels
24286081d6eSMark Adams       args: -dm_mat_type aijkokkos -dm_vec_type kokkos
243aa5a873eSStefano Zampini     test:
244aa5a873eSStefano Zampini       suffix: aijmkl_comp
245c4ad6305SSatish Balay       requires: mkl_sparse
24686081d6eSMark Adams       args: -dm_mat_type aijmkl
247aa5a873eSStefano Zampini 
248aa5a873eSStefano Zampini   test:
249aa5a873eSStefano Zampini     suffix: aijmkl_seq
250aa5a873eSStefano Zampini     nsize: 1
251c4ad6305SSatish Balay     requires: mkl_sparse
252aa5a873eSStefano Zampini     TODO: broken (INDEFINITE PC)
2532e776fa0SMark Adams     args: -dm_plex_dim 3 -dm_plex_box_faces 4,4,4 -dm_refine 1 -petscpartitioner_type simple -potential_petscspace_degree 1 -dm_plex_simplex 0 \
254bae903cbSmarkadams4           -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_threshold -1 -pc_gamg_square_graph 10 -pc_gamg_process_eq_limit 400 \
2552e776fa0SMark Adams           -pc_gamg_reuse_interpolation -pc_gamg_coarse_eq_limit 10 -pc_gamg_esteig_ksp_type cg -ksp_type cg -ksp_norm_type unpreconditioned \
25673f7197eSJed Brown           -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijmkl -dm_vec_type standard
257aa5a873eSStefano Zampini 
258e6f8f311SMark Adams   testset:
259a22370e2Smarkadams4     requires: cuda amgx
260a22370e2Smarkadams4     filter: grep -v Built | grep -v "AMGX version" | grep -v "CUDA Runtime"
261e6f8f311SMark Adams     output_file: output/ex13_amgx.out
262e6f8f311SMark Adams     args: -dm_plex_dim 2 -dm_plex_box_faces 2,2 -dm_refine 2 -petscpartitioner_type simple -potential_petscspace_degree 2 -dm_plex_simplex 0 -ksp_monitor \
263a22370e2Smarkadams4           -snes_type ksponly -dm_view -ksp_type cg -ksp_norm_type unpreconditioned -ksp_converged_reason -snes_rtol 1.e-4 -pc_type amgx -benchmark_it 1 -pc_amgx_verbose false
264e6f8f311SMark Adams     nsize: 4
265e6f8f311SMark Adams     test:
266e6f8f311SMark Adams       suffix: amgx
267e6f8f311SMark Adams       args: -dm_mat_type aijcusparse -dm_vec_type cuda
268e6f8f311SMark Adams     test:
269e6f8f311SMark Adams       suffix: amgx_cpu
270e6f8f311SMark Adams       args: -dm_mat_type aij
271e6f8f311SMark Adams 
2725e1f5104SMark TEST*/
273