15e1f5104SMark static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\ 2f9244615SMatthew G. Knepley We solve the Poisson problem in a rectangular domain\n\ 3f9244615SMatthew G. Knepley using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n"; 45e1f5104SMark 55e1f5104SMark #include <petscdmplex.h> 65e1f5104SMark #include <petscsnes.h> 75e1f5104SMark #include <petscds.h> 85e1f5104SMark #include <petscconvest.h> 9e6f8f311SMark Adams #if defined(PETSC_HAVE_AMGX) 10e6f8f311SMark Adams #include <amgx_c.h> 11e6f8f311SMark Adams #endif 125e1f5104SMark 135e1f5104SMark typedef struct { 14f9244615SMatthew G. Knepley PetscInt nit; /* Number of benchmark iterations */ 15f9244615SMatthew G. Knepley PetscBool strong; /* Do not integrate the Laplacian by parts */ 165e1f5104SMark } AppCtx; 175e1f5104SMark 18d71ae5a4SJacob Faibussowitsch static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) 19d71ae5a4SJacob Faibussowitsch { 205e1f5104SMark PetscInt d; 215e1f5104SMark *u = 0.0; 225e1f5104SMark for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0 * PETSC_PI * x[d]); 233ba16761SJacob Faibussowitsch return PETSC_SUCCESS; 245e1f5104SMark } 255e1f5104SMark 26d71ae5a4SJacob Faibussowitsch static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 27d71ae5a4SJacob Faibussowitsch { 285e1f5104SMark PetscInt d; 295e1f5104SMark for (d = 0; d < dim; ++d) f0[0] += -4.0 * PetscSqr(PETSC_PI) * PetscSinReal(2.0 * PETSC_PI * x[d]); 305e1f5104SMark } 315e1f5104SMark 32d71ae5a4SJacob Faibussowitsch static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 33d71ae5a4SJacob Faibussowitsch { 345e1f5104SMark PetscInt d; 355e1f5104SMark for (d = 0; d < dim; ++d) f1[d] = u_x[d]; 365e1f5104SMark } 375e1f5104SMark 38d71ae5a4SJacob Faibussowitsch static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) 39d71ae5a4SJacob Faibussowitsch { 405e1f5104SMark PetscInt d; 415e1f5104SMark for (d = 0; d < dim; ++d) g3[d * dim + d] = 1.0; 425e1f5104SMark } 435e1f5104SMark 44d71ae5a4SJacob Faibussowitsch static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) 45d71ae5a4SJacob Faibussowitsch { 46f9244615SMatthew G. Knepley *u = PetscSqr(x[0]) + PetscSqr(x[1]); 473ba16761SJacob Faibussowitsch return PETSC_SUCCESS; 48f9244615SMatthew G. Knepley } 49f9244615SMatthew G. Knepley 50d71ae5a4SJacob Faibussowitsch static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 51d71ae5a4SJacob Faibussowitsch { 52f9244615SMatthew G. Knepley PetscInt d; 53f9244615SMatthew G. Knepley for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d * dim + d]; 54f9244615SMatthew G. Knepley f0[0] += 4.0; 55f9244615SMatthew G. Knepley } 56f9244615SMatthew G. Knepley 57d71ae5a4SJacob Faibussowitsch static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) 58d71ae5a4SJacob Faibussowitsch { 595e1f5104SMark PetscFunctionBeginUser; 600c569c6eSMark options->nit = 10; 61f9244615SMatthew G. Knepley options->strong = PETSC_FALSE; 62d0609cedSBarry Smith PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX"); 639566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL)); 649566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL)); 65d0609cedSBarry Smith PetscOptionsEnd(); 663ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 675e1f5104SMark } 685e1f5104SMark 69d71ae5a4SJacob Faibussowitsch static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm) 70d71ae5a4SJacob Faibussowitsch { 715e1f5104SMark PetscFunctionBeginUser; 729566063dSJacob Faibussowitsch PetscCall(DMCreate(comm, dm)); 739566063dSJacob Faibussowitsch PetscCall(DMSetType(*dm, DMPLEX)); 749566063dSJacob Faibussowitsch PetscCall(DMSetFromOptions(*dm)); 759566063dSJacob Faibussowitsch PetscCall(DMSetApplicationContext(*dm, user)); 769566063dSJacob Faibussowitsch PetscCall(DMViewFromOptions(*dm, NULL, "-dm_view")); 7724def88cSMark Adams { // perturb to get general coordinates 7824def88cSMark Adams Vec coordinates; 7924def88cSMark Adams PetscScalar *coords; 8024def88cSMark Adams PetscInt nloc, v; 8124def88cSMark Adams PetscRandom rnd; 8224def88cSMark Adams PetscReal del; 8324def88cSMark Adams PetscCall(PetscRandomCreate(PETSC_COMM_SELF, &rnd)); 8424def88cSMark Adams PetscCall(PetscRandomSetInterval(rnd, -PETSC_SQRT_MACHINE_EPSILON, PETSC_SQRT_MACHINE_EPSILON)); 8524def88cSMark Adams PetscCall(PetscRandomSetFromOptions(rnd)); 8624def88cSMark Adams PetscCall(DMGetCoordinatesLocal(*dm, &coordinates)); 8724def88cSMark Adams PetscCall(VecGetArray(coordinates, &coords)); 8824def88cSMark Adams PetscCall(VecGetLocalSize(coordinates, &nloc)); 8924def88cSMark Adams for (v = 0; v < nloc; ++v) { 9024def88cSMark Adams PetscCall(PetscRandomGetValueReal(rnd, &del)); 9124def88cSMark Adams coords[v] += del * coords[v]; 9224def88cSMark Adams } 9324def88cSMark Adams PetscCall(VecRestoreArray(coordinates, &coords)); 9424def88cSMark Adams PetscCall(PetscRandomDestroy(&rnd)); 9524def88cSMark Adams } 963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 975e1f5104SMark } 985e1f5104SMark 99d71ae5a4SJacob Faibussowitsch static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user) 100d71ae5a4SJacob Faibussowitsch { 101f9244615SMatthew G. Knepley PetscDS ds; 10245480ffeSMatthew G. Knepley DMLabel label; 1035e1f5104SMark const PetscInt id = 1; 1045e1f5104SMark 1055e1f5104SMark PetscFunctionBeginUser; 1069566063dSJacob Faibussowitsch PetscCall(DMGetDS(dm, &ds)); 1079566063dSJacob Faibussowitsch PetscCall(DMGetLabel(dm, "marker", &label)); 108f9244615SMatthew G. Knepley if (user->strong) { 1099566063dSJacob Faibussowitsch PetscCall(PetscDSSetResidual(ds, 0, f0_strong_u, NULL)); 1109566063dSJacob Faibussowitsch PetscCall(PetscDSSetExactSolution(ds, 0, quadratic_u, user)); 111*5ebfa9e9SBarry Smith PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (PetscFortranCallbackFn *)quadratic_u, NULL, user, NULL)); 112f9244615SMatthew G. Knepley } else { 1139566063dSJacob Faibussowitsch PetscCall(PetscDSSetResidual(ds, 0, f0_trig_u, f1_u)); 1149566063dSJacob Faibussowitsch PetscCall(PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu)); 1159566063dSJacob Faibussowitsch PetscCall(PetscDSSetExactSolution(ds, 0, trig_u, user)); 11657d50842SBarry Smith PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (PetscVoidFn *)trig_u, NULL, user, NULL)); 117f9244615SMatthew G. Knepley } 1183ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1195e1f5104SMark } 1205e1f5104SMark 121d71ae5a4SJacob Faibussowitsch static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user) 122d71ae5a4SJacob Faibussowitsch { 1235e1f5104SMark DM cdm = dm; 1245e1f5104SMark PetscFE fe; 1255e1f5104SMark DMPolytopeType ct; 1265e1f5104SMark PetscBool simplex; 1275e1f5104SMark PetscInt dim, cStart; 1285e1f5104SMark char prefix[PETSC_MAX_PATH_LEN]; 1295e1f5104SMark 1305e1f5104SMark PetscFunctionBeginUser; 1319566063dSJacob Faibussowitsch PetscCall(DMGetDimension(dm, &dim)); 1329566063dSJacob Faibussowitsch PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, NULL)); 1339566063dSJacob Faibussowitsch PetscCall(DMPlexGetCellType(dm, cStart, &ct)); 1342e776fa0SMark Adams simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct) + 1 ? PETSC_TRUE : PETSC_FALSE; // false 1355e1f5104SMark /* Create finite element */ 1369566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name)); 1379566063dSJacob Faibussowitsch PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe)); 1389566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)fe, name)); 1395e1f5104SMark /* Set discretization and boundary conditions for each mesh */ 1409566063dSJacob Faibussowitsch PetscCall(DMSetField(dm, 0, NULL, (PetscObject)fe)); 1419566063dSJacob Faibussowitsch PetscCall(DMCreateDS(dm)); 1429566063dSJacob Faibussowitsch PetscCall((*setup)(dm, user)); 1435e1f5104SMark while (cdm) { 1449566063dSJacob Faibussowitsch PetscCall(DMCopyDisc(dm, cdm)); 1455e1f5104SMark /* TODO: Check whether the boundary of coarse meshes is marked */ 1469566063dSJacob Faibussowitsch PetscCall(DMGetCoarseDM(cdm, &cdm)); 1475e1f5104SMark } 1489566063dSJacob Faibussowitsch PetscCall(PetscFEDestroy(&fe)); 1493ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 1505e1f5104SMark } 1515e1f5104SMark 152d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv) 153d71ae5a4SJacob Faibussowitsch { 1545e1f5104SMark DM dm; /* Problem specification */ 1555e1f5104SMark SNES snes; /* Nonlinear solver */ 1565e1f5104SMark Vec u; /* Solutions */ 1575e1f5104SMark AppCtx user; /* User-defined work context */ 1582e776fa0SMark Adams PetscLogDouble time; 1592e776fa0SMark Adams Mat Amat; 1605e1f5104SMark 161327415f7SBarry Smith PetscFunctionBeginUser; 1629566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 1639566063dSJacob Faibussowitsch PetscCall(ProcessOptions(PETSC_COMM_WORLD, &user)); 1642e776fa0SMark Adams /* system */ 1659566063dSJacob Faibussowitsch PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 1669566063dSJacob Faibussowitsch PetscCall(CreateMesh(PETSC_COMM_WORLD, &user, &dm)); 1679566063dSJacob Faibussowitsch PetscCall(SNESSetDM(snes, dm)); 1689566063dSJacob Faibussowitsch PetscCall(SetupDiscretization(dm, "potential", SetupPrimalProblem, &user)); 1699566063dSJacob Faibussowitsch PetscCall(DMCreateGlobalVector(dm, &u)); 170e0b20f2aSMark Adams { 171e0b20f2aSMark Adams PetscInt N; 172e0b20f2aSMark Adams PetscCall(VecGetSize(u, &N)); 173e0b20f2aSMark Adams PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Number equations N = %" PetscInt_FMT "\n", N)); 174e0b20f2aSMark Adams } 1752e776fa0SMark Adams PetscCall(SNESSetFromOptions(snes)); 1769566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)u, "potential")); 1776493148fSStefano Zampini PetscCall(DMPlexSetSNESLocalFEM(dm, PETSC_FALSE, &user)); 1789566063dSJacob Faibussowitsch PetscCall(DMSNESCheckFromOptions(snes, u)); 1792e776fa0SMark Adams PetscCall(PetscTime(&time)); 1802e776fa0SMark Adams PetscCall(SNESSetUp(snes)); 181e6f8f311SMark Adams #if defined(PETSC_HAVE_AMGX) 182e6f8f311SMark Adams KSP ksp; 183e6f8f311SMark Adams PC pc; 184e6f8f311SMark Adams PetscBool flg; 185e6f8f311SMark Adams AMGX_resources_handle rsc; 186e6f8f311SMark Adams PetscCall(SNESGetKSP(snes, &ksp)); 187e6f8f311SMark Adams PetscCall(KSPGetPC(ksp, &pc)); 188e6f8f311SMark Adams PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCAMGX, &flg)); 189e6f8f311SMark Adams if (flg) { 190e6f8f311SMark Adams PetscCall(PCAmgXGetResources(pc, (void *)&rsc)); 191e6f8f311SMark Adams /* do ... with resource */ 192e6f8f311SMark Adams } 193e6f8f311SMark Adams #endif 1942e776fa0SMark Adams PetscCall(SNESGetJacobian(snes, &Amat, NULL, NULL, NULL)); 1952e776fa0SMark Adams PetscCall(MatSetOption(Amat, MAT_SPD, PETSC_TRUE)); 196b94d7dedSBarry Smith PetscCall(MatSetOption(Amat, MAT_SPD_ETERNAL, PETSC_TRUE)); 1979566063dSJacob Faibussowitsch PetscCall(SNESSolve(snes, NULL, u)); 1982e776fa0SMark Adams PetscCall(PetscTimeSubtract(&time)); 1995e1f5104SMark /* Benchmark system */ 2000c569c6eSMark if (user.nit) { 2015e1f5104SMark Vec b; 2020c569c6eSMark PetscInt i; 2032e776fa0SMark Adams PetscLogStage kspstage; 2042e776fa0SMark Adams PetscCall(PetscLogStageRegister("Solve only", &kspstage)); 2059566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(kspstage)); 2062e776fa0SMark Adams PetscCall(SNESGetSolution(snes, &u)); 2072e776fa0SMark Adams PetscCall(SNESGetFunction(snes, &b, NULL, NULL)); 2080c569c6eSMark for (i = 0; i < user.nit; i++) { 2099566063dSJacob Faibussowitsch PetscCall(VecZeroEntries(u)); 2102e776fa0SMark Adams PetscCall(SNESSolve(snes, NULL, u)); 2110c569c6eSMark } 2129566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 2135e1f5104SMark } 2149566063dSJacob Faibussowitsch PetscCall(SNESGetSolution(snes, &u)); 2159566063dSJacob Faibussowitsch PetscCall(VecViewFromOptions(u, NULL, "-potential_view")); 2165e1f5104SMark /* Cleanup */ 2179566063dSJacob Faibussowitsch PetscCall(VecDestroy(&u)); 2189566063dSJacob Faibussowitsch PetscCall(SNESDestroy(&snes)); 2199566063dSJacob Faibussowitsch PetscCall(DMDestroy(&dm)); 2209566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 221b122ec5aSJacob Faibussowitsch return 0; 2225e1f5104SMark } 2235e1f5104SMark 2245e1f5104SMark /*TEST 2255e1f5104SMark 2265e1f5104SMark test: 227f9244615SMatthew G. Knepley suffix: strong 228f9244615SMatthew G. Knepley requires: triangle 229cc2bab21SMatthew G. Knepley args: -dm_plex_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong -pc_type jacobi 230f9244615SMatthew G. Knepley 23186081d6eSMark Adams testset: 23286081d6eSMark Adams nsize: 4 23386081d6eSMark Adams output_file: output/ex13_comparison.out 234e923c352SMark Adams args: -dm_plex_dim 3 -benchmark_it 2 -dm_plex_simplex 0 -dm_plex_box_faces 2,2,1 -dm_refine 2 -petscpartitioner_simple_node_grid 1,1,1 -petscpartitioner_simple_process_grid 2,2,1 -potential_petscspace_degree 2 -petscpartitioner_type simple -snes_type ksponly -dm_view -ksp_type cg -ksp_rtol 1e-12 -snes_lag_jacobian -2 -dm_plex_box_upper 2,2,1 -dm_plex_box_lower 0,0,0 -pc_type gamg -pc_gamg_process_eq_limit 200 -pc_gamg_coarse_eq_limit 1000 -pc_gamg_esteig_ksp_type cg -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.05 -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_square_graph true -pc_gamg_threshold 0.04 -pc_gamg_threshold_scale .25 -pc_gamg_aggressive_coarsening 2 -pc_gamg_mis_k_minimum_degree_ordering true -ksp_monitor -ksp_norm_type unpreconditioned 2350c569c6eSMark test: 23618fb0606SStefano Zampini suffix: comparison 23718fb0606SStefano Zampini test: 2380c569c6eSMark suffix: cuda 2390c569c6eSMark requires: cuda 24086081d6eSMark Adams args: -dm_mat_type aijcusparse -dm_vec_type cuda 2410c569c6eSMark test: 2420c569c6eSMark suffix: kokkos 243e923c352SMark Adams requires: kokkos_kernels 244e923c352SMark Adams args: -dm_mat_type aijkokkos -dm_vec_type kokkos 245e923c352SMark Adams test: 246e923c352SMark Adams suffix: kokkos_sycl 247dcfd994dSJunchao Zhang requires: sycl kokkos_kernels 24886081d6eSMark Adams args: -dm_mat_type aijkokkos -dm_vec_type kokkos 249aa5a873eSStefano Zampini test: 250aa5a873eSStefano Zampini suffix: aijmkl_comp 251c4ad6305SSatish Balay requires: mkl_sparse 25286081d6eSMark Adams args: -dm_mat_type aijmkl 253aa5a873eSStefano Zampini 254e6f8f311SMark Adams testset: 255a22370e2Smarkadams4 requires: cuda amgx 256a22370e2Smarkadams4 filter: grep -v Built | grep -v "AMGX version" | grep -v "CUDA Runtime" 257e6f8f311SMark Adams output_file: output/ex13_amgx.out 258e6f8f311SMark Adams args: -dm_plex_dim 2 -dm_plex_box_faces 2,2 -dm_refine 2 -petscpartitioner_type simple -potential_petscspace_degree 2 -dm_plex_simplex 0 -ksp_monitor \ 259a22370e2Smarkadams4 -snes_type ksponly -dm_view -ksp_type cg -ksp_norm_type unpreconditioned -ksp_converged_reason -snes_rtol 1.e-4 -pc_type amgx -benchmark_it 1 -pc_amgx_verbose false 260e6f8f311SMark Adams nsize: 4 261e6f8f311SMark Adams test: 262e6f8f311SMark Adams suffix: amgx 263e6f8f311SMark Adams args: -dm_mat_type aijcusparse -dm_vec_type cuda 264e6f8f311SMark Adams test: 265e6f8f311SMark Adams suffix: amgx_cpu 266e6f8f311SMark Adams args: -dm_mat_type aij 267e6f8f311SMark Adams 2685e1f5104SMark TEST*/ 269