15e1f5104SMark static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\ 2f9244615SMatthew G. Knepley We solve the Poisson problem in a rectangular domain\n\ 3f9244615SMatthew G. Knepley using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n"; 45e1f5104SMark 55e1f5104SMark #include <petscdmplex.h> 65e1f5104SMark #include <petscsnes.h> 75e1f5104SMark #include <petscds.h> 85e1f5104SMark #include <petscconvest.h> 9*e6f8f311SMark Adams #if defined(PETSC_HAVE_AMGX) 10*e6f8f311SMark Adams #include <amgx_c.h> 11*e6f8f311SMark Adams #endif 125e1f5104SMark 135e1f5104SMark typedef struct { 14f9244615SMatthew G. Knepley PetscInt nit; /* Number of benchmark iterations */ 15f9244615SMatthew G. Knepley PetscBool strong; /* Do not integrate the Laplacian by parts */ 165e1f5104SMark } AppCtx; 175e1f5104SMark 185e1f5104SMark static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) 195e1f5104SMark { 205e1f5104SMark PetscInt d; 215e1f5104SMark *u = 0.0; 225e1f5104SMark for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0*PETSC_PI*x[d]); 235e1f5104SMark return 0; 245e1f5104SMark } 255e1f5104SMark 265e1f5104SMark static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, 275e1f5104SMark const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 285e1f5104SMark const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 295e1f5104SMark PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 305e1f5104SMark { 315e1f5104SMark PetscInt d; 325e1f5104SMark for (d = 0; d < dim; ++d) f0[0] += -4.0*PetscSqr(PETSC_PI)*PetscSinReal(2.0*PETSC_PI*x[d]); 335e1f5104SMark } 345e1f5104SMark 355e1f5104SMark static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, 365e1f5104SMark const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 375e1f5104SMark const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 385e1f5104SMark PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 395e1f5104SMark { 405e1f5104SMark PetscInt d; 415e1f5104SMark for (d = 0; d < dim; ++d) f1[d] = u_x[d]; 425e1f5104SMark } 435e1f5104SMark 445e1f5104SMark static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, 455e1f5104SMark const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 465e1f5104SMark const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 475e1f5104SMark PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) 485e1f5104SMark { 495e1f5104SMark PetscInt d; 505e1f5104SMark for (d = 0; d < dim; ++d) g3[d*dim+d] = 1.0; 515e1f5104SMark } 525e1f5104SMark 53f9244615SMatthew G. Knepley static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) 54f9244615SMatthew G. Knepley { 55f9244615SMatthew G. Knepley *u = PetscSqr(x[0]) + PetscSqr(x[1]); 56f9244615SMatthew G. Knepley return 0; 57f9244615SMatthew G. Knepley } 58f9244615SMatthew G. Knepley 59f9244615SMatthew G. Knepley static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, 60f9244615SMatthew G. Knepley const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 61f9244615SMatthew G. Knepley const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 62f9244615SMatthew G. Knepley PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 63f9244615SMatthew G. Knepley { 64f9244615SMatthew G. Knepley PetscInt d; 65f9244615SMatthew G. Knepley for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d*dim+d]; 66f9244615SMatthew G. Knepley f0[0] += 4.0; 67f9244615SMatthew G. Knepley } 68f9244615SMatthew G. Knepley 695e1f5104SMark static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) 705e1f5104SMark { 715e1f5104SMark 725e1f5104SMark PetscFunctionBeginUser; 730c569c6eSMark options->nit = 10; 74f9244615SMatthew G. Knepley options->strong = PETSC_FALSE; 75d0609cedSBarry Smith PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX"); 769566063dSJacob Faibussowitsch PetscCall(PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL)); 779566063dSJacob Faibussowitsch PetscCall(PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL)); 78d0609cedSBarry Smith PetscOptionsEnd(); 795e1f5104SMark PetscFunctionReturn(0); 805e1f5104SMark } 815e1f5104SMark 825e1f5104SMark static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm) 835e1f5104SMark { 845e1f5104SMark PetscFunctionBeginUser; 859566063dSJacob Faibussowitsch PetscCall(DMCreate(comm, dm)); 869566063dSJacob Faibussowitsch PetscCall(DMSetType(*dm, DMPLEX)); 879566063dSJacob Faibussowitsch PetscCall(DMSetFromOptions(*dm)); 889566063dSJacob Faibussowitsch PetscCall(DMSetApplicationContext(*dm, user)); 899566063dSJacob Faibussowitsch PetscCall(DMViewFromOptions(*dm, NULL, "-dm_view")); 905e1f5104SMark PetscFunctionReturn(0); 915e1f5104SMark } 925e1f5104SMark 935e1f5104SMark static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user) 945e1f5104SMark { 95f9244615SMatthew G. Knepley PetscDS ds; 9645480ffeSMatthew G. Knepley DMLabel label; 975e1f5104SMark const PetscInt id = 1; 985e1f5104SMark 995e1f5104SMark PetscFunctionBeginUser; 1009566063dSJacob Faibussowitsch PetscCall(DMGetDS(dm, &ds)); 1019566063dSJacob Faibussowitsch PetscCall(DMGetLabel(dm, "marker", &label)); 102f9244615SMatthew G. Knepley if (user->strong) { 1039566063dSJacob Faibussowitsch PetscCall(PetscDSSetResidual(ds, 0, f0_strong_u, NULL)); 1049566063dSJacob Faibussowitsch PetscCall(PetscDSSetExactSolution(ds, 0, quadratic_u, user)); 1059566063dSJacob Faibussowitsch PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void)) quadratic_u, NULL, user, NULL)); 106f9244615SMatthew G. Knepley } else { 1079566063dSJacob Faibussowitsch PetscCall(PetscDSSetResidual(ds, 0, f0_trig_u, f1_u)); 1089566063dSJacob Faibussowitsch PetscCall(PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu)); 1099566063dSJacob Faibussowitsch PetscCall(PetscDSSetExactSolution(ds, 0, trig_u, user)); 1109566063dSJacob Faibussowitsch PetscCall(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void)) trig_u, NULL, user, NULL)); 111f9244615SMatthew G. Knepley } 1125e1f5104SMark PetscFunctionReturn(0); 1135e1f5104SMark } 1145e1f5104SMark 1155e1f5104SMark static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user) 1165e1f5104SMark { 1175e1f5104SMark DM cdm = dm; 1185e1f5104SMark PetscFE fe; 1195e1f5104SMark DMPolytopeType ct; 1205e1f5104SMark PetscBool simplex; 1215e1f5104SMark PetscInt dim, cStart; 1225e1f5104SMark char prefix[PETSC_MAX_PATH_LEN]; 1235e1f5104SMark 1245e1f5104SMark PetscFunctionBeginUser; 1259566063dSJacob Faibussowitsch PetscCall(DMGetDimension(dm, &dim)); 1269566063dSJacob Faibussowitsch PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, NULL)); 1279566063dSJacob Faibussowitsch PetscCall(DMPlexGetCellType(dm, cStart, &ct)); 1282e776fa0SMark Adams simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct)+1 ? PETSC_TRUE : PETSC_FALSE; // false 1295e1f5104SMark /* Create finite element */ 1309566063dSJacob Faibussowitsch PetscCall(PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name)); 1319566063dSJacob Faibussowitsch PetscCall(PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe)); 1329566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject) fe, name)); 1335e1f5104SMark /* Set discretization and boundary conditions for each mesh */ 1349566063dSJacob Faibussowitsch PetscCall(DMSetField(dm, 0, NULL, (PetscObject) fe)); 1359566063dSJacob Faibussowitsch PetscCall(DMCreateDS(dm)); 1369566063dSJacob Faibussowitsch PetscCall((*setup)(dm, user)); 1375e1f5104SMark while (cdm) { 1389566063dSJacob Faibussowitsch PetscCall(DMCopyDisc(dm,cdm)); 1395e1f5104SMark /* TODO: Check whether the boundary of coarse meshes is marked */ 1409566063dSJacob Faibussowitsch PetscCall(DMGetCoarseDM(cdm, &cdm)); 1415e1f5104SMark } 1429566063dSJacob Faibussowitsch PetscCall(PetscFEDestroy(&fe)); 1435e1f5104SMark PetscFunctionReturn(0); 1445e1f5104SMark } 1455e1f5104SMark 1465e1f5104SMark int main(int argc, char **argv) 1475e1f5104SMark { 1485e1f5104SMark DM dm; /* Problem specification */ 1495e1f5104SMark SNES snes; /* Nonlinear solver */ 1505e1f5104SMark Vec u; /* Solutions */ 1515e1f5104SMark AppCtx user; /* User-defined work context */ 1522e776fa0SMark Adams PetscLogDouble time; 1532e776fa0SMark Adams Mat Amat; 1545e1f5104SMark 155327415f7SBarry Smith PetscFunctionBeginUser; 1569566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &argv, NULL,help)); 1579566063dSJacob Faibussowitsch PetscCall(ProcessOptions(PETSC_COMM_WORLD, &user)); 1582e776fa0SMark Adams /* system */ 1599566063dSJacob Faibussowitsch PetscCall(SNESCreate(PETSC_COMM_WORLD, &snes)); 1609566063dSJacob Faibussowitsch PetscCall(CreateMesh(PETSC_COMM_WORLD, &user, &dm)); 1619566063dSJacob Faibussowitsch PetscCall(SNESSetDM(snes, dm)); 1629566063dSJacob Faibussowitsch PetscCall(SetupDiscretization(dm, "potential", SetupPrimalProblem, &user)); 1639566063dSJacob Faibussowitsch PetscCall(DMCreateGlobalVector(dm, &u)); 1642e776fa0SMark Adams PetscCall(SNESSetFromOptions(snes)); 1659566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject) u, "potential")); 1669566063dSJacob Faibussowitsch PetscCall(DMPlexSetSNESLocalFEM(dm, &user, &user, &user)); 1679566063dSJacob Faibussowitsch PetscCall(DMSNESCheckFromOptions(snes, u)); 1682e776fa0SMark Adams PetscCall(PetscTime(&time)); 1692e776fa0SMark Adams PetscCall(SNESSetUp(snes)); 170*e6f8f311SMark Adams #if defined(PETSC_HAVE_AMGX) 171*e6f8f311SMark Adams KSP ksp; 172*e6f8f311SMark Adams PC pc; 173*e6f8f311SMark Adams PetscBool flg; 174*e6f8f311SMark Adams AMGX_resources_handle rsc; 175*e6f8f311SMark Adams PetscCall(SNESGetKSP(snes,&ksp)); 176*e6f8f311SMark Adams PetscCall(KSPGetPC(ksp,&pc)); 177*e6f8f311SMark Adams PetscCall(PetscObjectTypeCompare((PetscObject)pc,PCAMGX,&flg)); 178*e6f8f311SMark Adams if (flg) { 179*e6f8f311SMark Adams PetscCall(PCAmgXGetResources(pc,(void*)&rsc)); 180*e6f8f311SMark Adams /* do ... with resource */ 181*e6f8f311SMark Adams } 182*e6f8f311SMark Adams #endif 1832e776fa0SMark Adams PetscCall(SNESGetJacobian(snes, &Amat, NULL, NULL, NULL)); 1842e776fa0SMark Adams PetscCall(MatSetOption(Amat,MAT_SPD,PETSC_TRUE)); 185b94d7dedSBarry Smith PetscCall(MatSetOption(Amat,MAT_SPD_ETERNAL,PETSC_TRUE)); 1869566063dSJacob Faibussowitsch PetscCall(SNESSolve(snes, NULL, u)); 1872e776fa0SMark Adams PetscCall(PetscTimeSubtract(&time)); 1882e776fa0SMark Adams // PetscCall(PetscPrintf(PETSC_COMM_WORLD,"First Solve time: %g\n",-time)); 1895e1f5104SMark /* Benchmark system */ 1900c569c6eSMark if (user.nit) { 1915e1f5104SMark Vec b; 1920c569c6eSMark PetscInt i; 1932e776fa0SMark Adams #if defined(PETSC_USE_LOG) 1942e776fa0SMark Adams PetscLogStage kspstage; 1952e776fa0SMark Adams #endif 1962e776fa0SMark Adams PetscCall(PetscLogStageRegister("Solve only", &kspstage)); 1979566063dSJacob Faibussowitsch PetscCall(PetscLogStagePush(kspstage)); 1982e776fa0SMark Adams PetscCall(SNESGetSolution(snes, &u)); 1992e776fa0SMark Adams PetscCall(SNESGetFunction(snes, &b, NULL, NULL)); 2000c569c6eSMark for (i=0;i<user.nit;i++) { 2019566063dSJacob Faibussowitsch PetscCall(VecZeroEntries(u)); 2022e776fa0SMark Adams PetscCall(SNESSolve(snes, NULL, u)); 2030c569c6eSMark } 2049566063dSJacob Faibussowitsch PetscCall(PetscLogStagePop()); 2055e1f5104SMark } 2069566063dSJacob Faibussowitsch PetscCall(SNESGetSolution(snes, &u)); 2079566063dSJacob Faibussowitsch PetscCall(VecViewFromOptions(u, NULL, "-potential_view")); 2085e1f5104SMark /* Cleanup */ 2099566063dSJacob Faibussowitsch PetscCall(VecDestroy(&u)); 2109566063dSJacob Faibussowitsch PetscCall(SNESDestroy(&snes)); 2119566063dSJacob Faibussowitsch PetscCall(DMDestroy(&dm)); 2129566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 213b122ec5aSJacob Faibussowitsch return 0; 2145e1f5104SMark } 2155e1f5104SMark 2165e1f5104SMark /*TEST 2175e1f5104SMark 2185e1f5104SMark test: 219f9244615SMatthew G. Knepley suffix: strong 220f9244615SMatthew G. Knepley requires: triangle 22186081d6eSMark Adams args: -dm_plex_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong 222f9244615SMatthew G. Knepley 223f9244615SMatthew G. Knepley test: 2245e1f5104SMark suffix: bench 2250c569c6eSMark nsize: 4 2262e776fa0SMark Adams args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_faces 2,2,1 -dm_refine 2 -dm_view -ksp_monitor \ 22786081d6eSMark Adams -benchmark_it 1 -dm_plex_box_upper 2,2,1 -dm_plex_box_lower 0,0,0 -dm_plex_dim 3 -ksp_converged_reason \ 22886081d6eSMark Adams -ksp_norm_type unpreconditioned -ksp_rtol 1.e-6 -ksp_type cg -mg_levels_ksp_chebyshev_esteig 0,0.2,0,1.1 \ 22986081d6eSMark Adams -mg_levels_ksp_max_it 1 -mg_levels_ksp_type chebyshev -mg_levels_pc_type jacobi -pc_gamg_coarse_eq_limit 200 \ 23086081d6eSMark Adams -pc_gamg_coarse_grid_layout_type compact -pc_gamg_esteig_ksp_max_it 5 -pc_gamg_process_eq_limit 200 \ 23186081d6eSMark Adams -pc_gamg_repartition false -pc_gamg_reuse_interpolation true -pc_gamg_aggressive_coarsening 0 -pc_gamg_threshold 0.001 -pc_gamg_threshold_scale .5 \ 23286081d6eSMark Adams -pc_gamg_type agg -pc_type gamg -petscpartitioner_simple_node_grid 1,2,1 -petscpartitioner_simple_process_grid 2,1,1 \ 23386081d6eSMark Adams -petscpartitioner_type simple -potential_petscspace_degree 2 -snes_lag_jacobian -2 -snes_max_it 1 -snes_rtol 1.e-8 -snes_type ksponly -use_gpu_aware_mpi true 2345e1f5104SMark 23586081d6eSMark Adams testset: 23686081d6eSMark Adams nsize: 4 23786081d6eSMark Adams output_file: output/ex13_comparison.out 23886081d6eSMark Adams args: -dm_plex_dim 2 -benchmark_it 10 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \ 23986081d6eSMark Adams -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -petscpartitioner_type simple \ 24086081d6eSMark Adams -dm_plex_simplex 0 -snes_type ksponly -dm_view -ksp_type cg -pc_type gamg -pc_gamg_process_eq_limit 400 \ 24186081d6eSMark Adams -ksp_norm_type unpreconditioned -ksp_converged_reason 2420c569c6eSMark test: 24318fb0606SStefano Zampini suffix: comparison 24418fb0606SStefano Zampini test: 2450c569c6eSMark suffix: cuda 2460c569c6eSMark requires: cuda 24786081d6eSMark Adams args: -dm_mat_type aijcusparse -dm_vec_type cuda 2480c569c6eSMark test: 2490c569c6eSMark suffix: kokkos 25086081d6eSMark Adams requires: !sycl kokkos_kernels 25186081d6eSMark Adams args: -dm_mat_type aijkokkos -dm_vec_type kokkos 252aa5a873eSStefano Zampini test: 253aa5a873eSStefano Zampini suffix: aijmkl_comp 254c4ad6305SSatish Balay requires: mkl_sparse 25586081d6eSMark Adams args: -dm_mat_type aijmkl 256aa5a873eSStefano Zampini 257aa5a873eSStefano Zampini test: 258aa5a873eSStefano Zampini suffix: aijmkl_seq 259aa5a873eSStefano Zampini nsize: 1 260c4ad6305SSatish Balay requires: mkl_sparse 261aa5a873eSStefano Zampini TODO: broken (INDEFINITE PC) 2622e776fa0SMark Adams args: -dm_plex_dim 3 -dm_plex_box_faces 4,4,4 -dm_refine 1 -petscpartitioner_type simple -potential_petscspace_degree 1 -dm_plex_simplex 0 \ 263bae903cbSmarkadams4 -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_threshold -1 -pc_gamg_square_graph 10 -pc_gamg_process_eq_limit 400 \ 2642e776fa0SMark Adams -pc_gamg_reuse_interpolation -pc_gamg_coarse_eq_limit 10 -pc_gamg_esteig_ksp_type cg -ksp_type cg -ksp_norm_type unpreconditioned \ 26573f7197eSJed Brown -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijmkl -dm_vec_type standard 266aa5a873eSStefano Zampini 267*e6f8f311SMark Adams testset: 268*e6f8f311SMark Adams requires: amgx 269*e6f8f311SMark Adams output_file: output/ex13_amgx.out 270*e6f8f311SMark Adams args: -dm_plex_dim 2 -dm_plex_box_faces 2,2 -dm_refine 2 -petscpartitioner_type simple -potential_petscspace_degree 2 -dm_plex_simplex 0 -ksp_monitor \ 271*e6f8f311SMark Adams -snes_type ksponly -dm_view -ksp_type cg -ksp_norm_type unpreconditioned -ksp_converged_reason -snes_rtol 1.e-4 -pc_type amgx -benchmark_it 1 -pc_amgx_verbose true 272*e6f8f311SMark Adams nsize: 4 273*e6f8f311SMark Adams test: 274*e6f8f311SMark Adams suffix: amgx 275*e6f8f311SMark Adams args: -dm_mat_type aijcusparse -dm_vec_type cuda 276*e6f8f311SMark Adams test: 277*e6f8f311SMark Adams suffix: amgx_cpu 278*e6f8f311SMark Adams args: -dm_mat_type aij 279*e6f8f311SMark Adams 2805e1f5104SMark TEST*/ 281