1 static char help[] = "Benchmark Poisson Problem in 2d and 3d with finite elements.\n\ 2 We solve the Poisson problem in a rectangular domain\n\ 3 using a parallel unstructured mesh (DMPLEX) to discretize it.\n\n\n"; 4 5 #include <petscdmplex.h> 6 #include <petscsnes.h> 7 #include <petscds.h> 8 #include <petscconvest.h> 9 10 typedef struct { 11 PetscInt nit; /* Number of benchmark iterations */ 12 PetscBool strong; /* Do not integrate the Laplacian by parts */ 13 } AppCtx; 14 15 static PetscErrorCode trig_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) 16 { 17 PetscInt d; 18 *u = 0.0; 19 for (d = 0; d < dim; ++d) *u += PetscSinReal(2.0*PETSC_PI*x[d]); 20 return 0; 21 } 22 23 static void f0_trig_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, 24 const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 25 const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 26 PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 27 { 28 PetscInt d; 29 for (d = 0; d < dim; ++d) f0[0] += -4.0*PetscSqr(PETSC_PI)*PetscSinReal(2.0*PETSC_PI*x[d]); 30 } 31 32 static void f1_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, 33 const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 34 const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 35 PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f1[]) 36 { 37 PetscInt d; 38 for (d = 0; d < dim; ++d) f1[d] = u_x[d]; 39 } 40 41 static void g3_uu(PetscInt dim, PetscInt Nf, PetscInt NfAux, 42 const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 43 const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 44 PetscReal t, PetscReal u_tShift, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar g3[]) 45 { 46 PetscInt d; 47 for (d = 0; d < dim; ++d) g3[d*dim+d] = 1.0; 48 } 49 50 static PetscErrorCode quadratic_u(PetscInt dim, PetscReal time, const PetscReal x[], PetscInt Nc, PetscScalar *u, void *ctx) 51 { 52 *u = PetscSqr(x[0]) + PetscSqr(x[1]); 53 return 0; 54 } 55 56 static void f0_strong_u(PetscInt dim, PetscInt Nf, PetscInt NfAux, 57 const PetscInt uOff[], const PetscInt uOff_x[], const PetscScalar u[], const PetscScalar u_t[], const PetscScalar u_x[], 58 const PetscInt aOff[], const PetscInt aOff_x[], const PetscScalar a[], const PetscScalar a_t[], const PetscScalar a_x[], 59 PetscReal t, const PetscReal x[], PetscInt numConstants, const PetscScalar constants[], PetscScalar f0[]) 60 { 61 PetscInt d; 62 for (d = 0; d < dim; ++d) f0[0] -= u_x[dim + d*dim+d]; 63 f0[0] += 4.0; 64 } 65 66 static PetscErrorCode ProcessOptions(MPI_Comm comm, AppCtx *options) 67 { 68 PetscErrorCode ierr; 69 70 PetscFunctionBeginUser; 71 options->nit = 10; 72 options->strong = PETSC_FALSE; 73 ierr = PetscOptionsBegin(comm, "", "Poisson Problem Options", "DMPLEX");CHKERRQ(ierr); 74 CHKERRQ(PetscOptionsInt("-benchmark_it", "Solve the benchmark problem this many times", "ex13.c", options->nit, &options->nit, NULL)); 75 CHKERRQ(PetscOptionsBool("-strong", "Do not integrate the Laplacian by parts", "ex13.c", options->strong, &options->strong, NULL)); 76 ierr = PetscOptionsEnd();CHKERRQ(ierr); 77 PetscFunctionReturn(0); 78 } 79 80 static PetscErrorCode CreateMesh(MPI_Comm comm, AppCtx *user, DM *dm) 81 { 82 PetscFunctionBeginUser; 83 CHKERRQ(DMCreate(comm, dm)); 84 CHKERRQ(DMSetType(*dm, DMPLEX)); 85 CHKERRQ(DMSetFromOptions(*dm)); 86 CHKERRQ(DMSetApplicationContext(*dm, user)); 87 CHKERRQ(DMViewFromOptions(*dm, NULL, "-dm_view")); 88 PetscFunctionReturn(0); 89 } 90 91 static PetscErrorCode SetupPrimalProblem(DM dm, AppCtx *user) 92 { 93 PetscDS ds; 94 DMLabel label; 95 const PetscInt id = 1; 96 97 PetscFunctionBeginUser; 98 CHKERRQ(DMGetDS(dm, &ds)); 99 CHKERRQ(DMGetLabel(dm, "marker", &label)); 100 if (user->strong) { 101 CHKERRQ(PetscDSSetResidual(ds, 0, f0_strong_u, NULL)); 102 CHKERRQ(PetscDSSetExactSolution(ds, 0, quadratic_u, user)); 103 CHKERRQ(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void)) quadratic_u, NULL, user, NULL)); 104 } else { 105 CHKERRQ(PetscDSSetResidual(ds, 0, f0_trig_u, f1_u)); 106 CHKERRQ(PetscDSSetJacobian(ds, 0, 0, NULL, NULL, NULL, g3_uu)); 107 CHKERRQ(PetscDSSetExactSolution(ds, 0, trig_u, user)); 108 CHKERRQ(DMAddBoundary(dm, DM_BC_ESSENTIAL, "wall", label, 1, &id, 0, 0, NULL, (void (*)(void)) trig_u, NULL, user, NULL)); 109 } 110 PetscFunctionReturn(0); 111 } 112 113 static PetscErrorCode SetupDiscretization(DM dm, const char name[], PetscErrorCode (*setup)(DM, AppCtx *), AppCtx *user) 114 { 115 DM cdm = dm; 116 PetscFE fe; 117 DMPolytopeType ct; 118 PetscBool simplex; 119 PetscInt dim, cStart; 120 char prefix[PETSC_MAX_PATH_LEN]; 121 122 PetscFunctionBeginUser; 123 CHKERRQ(DMGetDimension(dm, &dim)); 124 CHKERRQ(DMPlexGetHeightStratum(dm, 0, &cStart, NULL)); 125 CHKERRQ(DMPlexGetCellType(dm, cStart, &ct)); 126 simplex = DMPolytopeTypeGetNumVertices(ct) == DMPolytopeTypeGetDim(ct)+1 ? PETSC_TRUE : PETSC_FALSE; 127 /* Create finite element */ 128 CHKERRQ(PetscSNPrintf(prefix, PETSC_MAX_PATH_LEN, "%s_", name)); 129 CHKERRQ(PetscFECreateDefault(PETSC_COMM_SELF, dim, 1, simplex, name ? prefix : NULL, -1, &fe)); 130 CHKERRQ(PetscObjectSetName((PetscObject) fe, name)); 131 /* Set discretization and boundary conditions for each mesh */ 132 CHKERRQ(DMSetField(dm, 0, NULL, (PetscObject) fe)); 133 CHKERRQ(DMCreateDS(dm)); 134 CHKERRQ((*setup)(dm, user)); 135 while (cdm) { 136 CHKERRQ(DMCopyDisc(dm,cdm)); 137 /* TODO: Check whether the boundary of coarse meshes is marked */ 138 CHKERRQ(DMGetCoarseDM(cdm, &cdm)); 139 } 140 CHKERRQ(PetscFEDestroy(&fe)); 141 PetscFunctionReturn(0); 142 } 143 144 int main(int argc, char **argv) 145 { 146 DM dm; /* Problem specification */ 147 SNES snes; /* Nonlinear solver */ 148 Vec u; /* Solutions */ 149 AppCtx user; /* User-defined work context */ 150 PetscErrorCode ierr; 151 152 ierr = PetscInitialize(&argc, &argv, NULL,help);if (ierr) return ierr; 153 CHKERRQ(ProcessOptions(PETSC_COMM_WORLD, &user)); 154 /* Primal system */ 155 CHKERRQ(SNESCreate(PETSC_COMM_WORLD, &snes)); 156 CHKERRQ(CreateMesh(PETSC_COMM_WORLD, &user, &dm)); 157 CHKERRQ(SNESSetDM(snes, dm)); 158 CHKERRQ(SetupDiscretization(dm, "potential", SetupPrimalProblem, &user)); 159 CHKERRQ(DMCreateGlobalVector(dm, &u)); 160 CHKERRQ(VecSet(u, 0.0)); 161 CHKERRQ(PetscObjectSetName((PetscObject) u, "potential")); 162 CHKERRQ(DMPlexSetSNESLocalFEM(dm, &user, &user, &user)); 163 CHKERRQ(SNESSetFromOptions(snes)); 164 CHKERRQ(DMSNESCheckFromOptions(snes, u)); 165 CHKERRQ(SNESSolve(snes, NULL, u)); 166 /* Benchmark system */ 167 if (user.nit) { 168 #if defined(PETSC_USE_LOG) 169 PetscLogStage kspstage,pcstage; 170 #endif 171 KSP ksp; 172 PC pc; 173 Vec b; 174 PetscInt i; 175 PetscLogDouble time; 176 CHKERRQ(PetscOptionsClearValue(NULL,"-ksp_monitor")); 177 CHKERRQ(PetscOptionsClearValue(NULL,"-ksp_view")); 178 CHKERRQ(SNESGetKSP(snes, &ksp)); 179 CHKERRQ(SNESGetSolution(snes, &u)); 180 CHKERRQ(KSPSetFromOptions(ksp)); 181 CHKERRQ(VecSet(u, 0.0)); 182 CHKERRQ(SNESGetFunction(snes, &b, NULL, NULL)); 183 CHKERRQ(KSPGetPC(ksp, &pc)); 184 CHKERRQ(PetscLogStageRegister("PCSetUp", &pcstage)); 185 CHKERRQ(PetscLogStagePush(pcstage)); 186 CHKERRQ(PCSetUp(pc)); 187 CHKERRQ(PetscLogStagePop()); 188 CHKERRQ(PetscLogStageRegister("KSP Solve only", &kspstage)); 189 CHKERRQ(PetscTime(&time)); 190 CHKERRQ(PetscLogStagePush(kspstage)); 191 for (i=0;i<user.nit;i++) { 192 CHKERRQ(VecZeroEntries(u)); 193 CHKERRQ(KSPSolve(ksp, b, u)); 194 } 195 CHKERRQ(PetscLogStagePop()); 196 CHKERRQ(PetscTimeSubtract(&time)); 197 // ierr = PetscPrintf(PETSC_COMM_WORLD,"Solve time: %g\n",-time); // breaks CI 198 } 199 CHKERRQ(SNESGetSolution(snes, &u)); 200 CHKERRQ(VecViewFromOptions(u, NULL, "-potential_view")); 201 /* Cleanup */ 202 CHKERRQ(VecDestroy(&u)); 203 CHKERRQ(SNESDestroy(&snes)); 204 CHKERRQ(DMDestroy(&dm)); 205 ierr = PetscFinalize(); 206 return ierr; 207 } 208 209 /*TEST 210 211 test: 212 suffix: strong 213 requires: triangle 214 args: -dm_plex_dim 2 -dm_refine 1 -benchmark_it 0 -dmsnes_check \ 215 -potential_petscspace_degree 2 -dm_ds_jet_degree 2 -strong 216 217 test: 218 suffix: bench 219 nsize: 4 220 args: -dm_plex_dim 3 -dm_plex_simplex 0 -dm_plex_box_faces 2,2,8 -dm_refine 1 \ 221 -petscpartitioner_type simple -petscpartitioner_simple_process_grid 1,1,2 -petscpartitioner_simple_node_grid 1,1,2 \ 222 -potential_petscspace_degree 2 -ksp_type cg -pc_type gamg -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -benchmark_it 1 -dm_view -snes_rtol 1.e-4 223 224 test: 225 suffix: comparison 226 nsize: 4 227 args: -dm_plex_dim 2 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \ 228 -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -petscpartitioner_type simple \ 229 -dm_plex_simplex 0 -snes_monitor_short -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_process_eq_limit 400 -ksp_norm_type unpreconditioned \ 230 -pc_gamg_coarse_eq_limit 10 -snes_converged_reason -ksp_converged_reason -snes_rtol 1.e-4 231 232 test: 233 suffix: cuda 234 nsize: 4 235 requires: cuda 236 output_file: output/ex13_comparison.out 237 args: -dm_plex_dim 2 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \ 238 -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -petscpartitioner_type simple \ 239 -dm_plex_simplex 0 -snes_monitor_short -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_process_eq_limit 400 -ksp_norm_type unpreconditioned \ 240 -pc_gamg_coarse_eq_limit 10 -snes_converged_reason -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijcusparse -dm_vec_type cuda 241 242 test: 243 suffix: kokkos_comp 244 nsize: 4 245 requires: !sycl kokkos_kernels 246 output_file: output/ex13_comparison.out 247 args: -dm_plex_dim 2 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \ 248 -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -petscpartitioner_type simple \ 249 -dm_plex_simplex 0 -snes_monitor_short -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_process_eq_limit 400 -ksp_norm_type unpreconditioned \ 250 -pc_gamg_coarse_eq_limit 10 -snes_converged_reason -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijkokkos -dm_vec_type kokkos 251 252 test: 253 nsize: 4 254 requires: !sycl kokkos_kernels 255 suffix: kokkos 256 args: -dm_plex_dim 2 -dm_plex_box_faces 2,8 -petscpartitioner_type simple -petscpartitioner_simple_process_grid 2,1 \ 257 -petscpartitioner_simple_node_grid 2,1 -dm_plex_simplex 0 -potential_petscspace_degree 1 -dm_refine 1 -ksp_type cg -pc_type gamg -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -ksp_norm_type unpreconditioned \ 258 -pc_gamg_esteig_ksp_type cg -ksp_converged_reason -snes_monitor_short -snes_rtol 1.e-4 -dm_view -dm_mat_type aijkokkos -dm_vec_type kokkos 259 260 test: 261 suffix: aijmkl_comp 262 nsize: 4 263 requires: mkl_sparse 264 output_file: output/ex13_comparison.out 265 args: -dm_plex_dim 2 -dm_plex_box_faces 4,4 -dm_refine 3 -petscpartitioner_simple_process_grid 2,2 \ 266 -petscpartitioner_simple_node_grid 1,1 -potential_petscspace_degree 2 -petscpartitioner_type simple \ 267 -dm_plex_simplex 0 -snes_monitor_short -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_esteig_ksp_type cg -pc_gamg_esteig_ksp_max_it 10 -pc_gamg_process_eq_limit 400 -ksp_norm_type unpreconditioned \ 268 -pc_gamg_coarse_eq_limit 10 -snes_converged_reason -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijmkl 269 270 test: 271 suffix: aijmkl_seq 272 nsize: 1 273 requires: mkl_sparse 274 TODO: broken (INDEFINITE PC) 275 args: -dm_plex_dim 3 -dm_plex_box_faces 4,4,4 -dm_refine 1 -petscpartitioner_type simple -potential_petscspace_degree 1 -dm_plex_simplex 0 -snes_monitor_short \ 276 -snes_type ksponly -dm_view -pc_type gamg -pc_gamg_sym_graph 0 -pc_gamg_threshold -1 -pc_gamg_square_graph 10 -pc_gamg_process_eq_limit 400 \ 277 -pc_gamg_reuse_interpolation -pc_gamg_coarse_eq_limit 10 -pc_gamg_esteig_ksp_type cg -ksp_type cg -ksp_norm_type unpreconditioned -snes_converged_reason \ 278 -ksp_converged_reason -snes_rtol 1.e-4 -dm_mat_type aijmkl -dm_vec_type standard 279 280 TEST*/ 281