xref: /petsc/src/ksp/pc/impls/amgx/amgx.cxx (revision 9d0fdff2e8b5ebdd0e57bf4dfe77e45fec5df614)
1 /*
2      This file implements an AmgX preconditioner in PETSc as part of PC.
3  */
4 
5 /*
6    Include files needed for the AmgX preconditioner:
7      pcimpl.h - private include file intended for use by all preconditioners
8 */
9 
10 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
11 #include <petscdevice_cuda.h>
12 #include <amgx_c.h>
13 #include <limits>
14 #include <vector>
15 #include <algorithm>
16 #include <map>
17 #include <numeric>
18 #include "cuda_runtime.h"
19 
20 enum class AmgXSmoother {
21   PCG,
22   PCGF,
23   PBiCGStab,
24   GMRES,
25   FGMRES,
26   JacobiL1,
27   BlockJacobi,
28   GS,
29   MulticolorGS,
30   MulticolorILU,
31   MulticolorDILU,
32   ChebyshevPoly,
33   NoSolver
34 };
35 enum class AmgXAMGMethod {
36   Classical,
37   Aggregation
38 };
39 enum class AmgXSelector {
40   Size2,
41   Size4,
42   Size8,
43   MultiPairwise,
44   PMIS,
45   HMIS
46 };
47 enum class AmgXCoarseSolver {
48   DenseLU,
49   NoSolver
50 };
51 enum class AmgXAMGCycle {
52   V,
53   W,
54   F,
55   CG,
56   CGF
57 };
58 
59 struct AmgXControlMap {
60   static const std::map<std::string, AmgXAMGMethod>    AMGMethods;
61   static const std::map<std::string, AmgXSmoother>     Smoothers;
62   static const std::map<std::string, AmgXSelector>     Selectors;
63   static const std::map<std::string, AmgXCoarseSolver> CoarseSolvers;
64   static const std::map<std::string, AmgXAMGCycle>     AMGCycles;
65 };
66 
67 const std::map<std::string, AmgXAMGMethod> AmgXControlMap::AMGMethods = {
68   {"CLASSICAL",   AmgXAMGMethod::Classical  },
69   {"AGGREGATION", AmgXAMGMethod::Aggregation}
70 };
71 
72 const std::map<std::string, AmgXSmoother> AmgXControlMap::Smoothers = {
73   {"PCG",             AmgXSmoother::PCG           },
74   {"PCGF",            AmgXSmoother::PCGF          },
75   {"PBICGSTAB",       AmgXSmoother::PBiCGStab     },
76   {"GMRES",           AmgXSmoother::GMRES         },
77   {"FGMRES",          AmgXSmoother::FGMRES        },
78   {"JACOBI_L1",       AmgXSmoother::JacobiL1      },
79   {"BLOCK_JACOBI",    AmgXSmoother::BlockJacobi   },
80   {"GS",              AmgXSmoother::GS            },
81   {"MULTICOLOR_GS",   AmgXSmoother::MulticolorGS  },
82   {"MULTICOLOR_ILU",  AmgXSmoother::MulticolorILU },
83   {"MULTICOLOR_DILU", AmgXSmoother::MulticolorDILU},
84   {"CHEBYSHEV_POLY",  AmgXSmoother::ChebyshevPoly },
85   {"NOSOLVER",        AmgXSmoother::NoSolver      }
86 };
87 
88 const std::map<std::string, AmgXSelector> AmgXControlMap::Selectors = {
89   {"SIZE_2",         AmgXSelector::Size2        },
90   {"SIZE_4",         AmgXSelector::Size4        },
91   {"SIZE_8",         AmgXSelector::Size8        },
92   {"MULTI_PAIRWISE", AmgXSelector::MultiPairwise},
93   {"PMIS",           AmgXSelector::PMIS         },
94   {"HMIS",           AmgXSelector::HMIS         }
95 };
96 
97 const std::map<std::string, AmgXCoarseSolver> AmgXControlMap::CoarseSolvers = {
98   {"DENSE_LU_SOLVER", AmgXCoarseSolver::DenseLU },
99   {"NOSOLVER",        AmgXCoarseSolver::NoSolver}
100 };
101 
102 const std::map<std::string, AmgXAMGCycle> AmgXControlMap::AMGCycles = {
103   {"V",   AmgXAMGCycle::V  },
104   {"W",   AmgXAMGCycle::W  },
105   {"F",   AmgXAMGCycle::F  },
106   {"CG",  AmgXAMGCycle::CG },
107   {"CGF", AmgXAMGCycle::CGF}
108 };
109 
110 /*
111    Private context (data structure) for the AMGX preconditioner.
112 */
113 struct PC_AMGX {
114   AMGX_solver_handle    solver;
115   AMGX_config_handle    cfg;
116   AMGX_resources_handle rsrc;
117   bool                  solve_state_init;
118   bool                  rsrc_init;
119   PetscBool             verbose;
120 
121   AMGX_matrix_handle A;
122   AMGX_vector_handle sol;
123   AMGX_vector_handle rhs;
124 
125   MPI_Comm    comm;
126   PetscMPIInt rank   = 0;
127   PetscMPIInt nranks = 0;
128   int         devID  = 0;
129 
130   void       *lib_handle = 0;
131   std::string cfg_contents;
132 
133   // Cached state for re-setup
134   PetscInt           nnz;
135   PetscInt           nLocalRows;
136   PetscInt           nGlobalRows;
137   PetscInt           bSize;
138   Mat                localA;
139   const PetscScalar *values;
140 
141   // AMG Control parameters
142   AmgXSmoother     smoother;
143   AmgXAMGMethod    amg_method;
144   AmgXSelector     selector;
145   AmgXCoarseSolver coarse_solver;
146   AmgXAMGCycle     amg_cycle;
147   PetscInt         presweeps;
148   PetscInt         postsweeps;
149   PetscInt         max_levels;
150   PetscInt         aggressive_levels;
151   PetscInt         dense_lu_num_rows;
152   PetscScalar      strength_threshold;
153   PetscBool        print_grid_stats;
154   PetscBool        exact_coarse_solve;
155 
156   // Smoother control parameters
157   PetscScalar jacobi_relaxation_factor;
158   PetscScalar gs_symmetric;
159 };
160 
161 static PetscInt s_count = 0;
162 
163 // Buffer of messages from AmgX
164 // Currently necessary hack before we adapt AmgX to print from single rank only
165 static std::string amgx_output{};
166 
167 // A print callback that allows AmgX to return status messages
168 static void print_callback(const char *msg, int length) {
169   amgx_output.append(msg);
170 }
171 
172 // Outputs messages from the AmgX message buffer and clears it
173 PetscErrorCode amgx_output_messages(PC_AMGX *amgx) {
174   PetscFunctionBegin;
175 
176   // If AmgX output is enabled and we have a message, output it
177   if (amgx->verbose && !amgx_output.empty()) {
178     // Only a single rank to output the AmgX messages
179     PetscCall(PetscPrintf(amgx->comm, "AMGX: %s", amgx_output.c_str()));
180 
181     // Note that all ranks clear their received output
182     amgx_output.clear();
183   }
184 
185   PetscFunctionReturn(0);
186 }
187 
188 // XXX Need to add call in AmgX API that gracefully destroys everything
189 // without abort etc.
190 #define PetscCallAmgX(rc) \
191   do { \
192     AMGX_RC err = (rc); \
193     char    msg[4096]; \
194     switch (err) { \
195     case AMGX_RC_OK: break; \
196     default: AMGX_get_error_string(err, msg, 4096); SETERRQ(amgx->comm, PETSC_ERR_LIB, "%s", msg); \
197     } \
198   } while (0)
199 
200 /*
201    PCSetUp_AMGX - Prepares for the use of the AmgX preconditioner
202                     by setting data structures and options.
203 
204    Input Parameter:
205 .  pc - the preconditioner context
206 
207    Application Interface Routine: PCSetUp()
208 
209    Note:
210    The interface routine PCSetUp() is not usually called directly by
211    the user, but instead is called by PCApply() if necessary.
212 */
213 static PetscErrorCode PCSetUp_AMGX(PC pc) {
214   PC_AMGX  *amgx = (PC_AMGX *)pc->data;
215   Mat       Pmat = pc->pmat;
216   PetscBool is_dev_ptrs;
217 
218   PetscFunctionBegin;
219   PetscCall(PetscObjectTypeCompareAny((PetscObject)Pmat, &is_dev_ptrs, MATAIJCUSPARSE, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
220 
221   // At the present time, an AmgX matrix is a sequential matrix
222   // Non-sequential/MPI matrices must be adapted to extract the local matrix
223   bool partial_setup_allowed = (pc->setupcalled && pc->flag != DIFFERENT_NONZERO_PATTERN);
224   if (amgx->nranks > 1) {
225     if (partial_setup_allowed) {
226       PetscCall(MatMPIAIJGetLocalMat(Pmat, MAT_REUSE_MATRIX, &amgx->localA));
227     } else {
228       PetscCall(MatMPIAIJGetLocalMat(Pmat, MAT_INITIAL_MATRIX, &amgx->localA));
229     }
230 
231     if (is_dev_ptrs) PetscCall(MatConvert(amgx->localA, MATSEQAIJCUSPARSE, MAT_INPLACE_MATRIX, &amgx->localA));
232   } else {
233     amgx->localA = Pmat;
234   }
235 
236   if (is_dev_ptrs) {
237     PetscCall(MatSeqAIJCUSPARSEGetArrayRead(amgx->localA, &amgx->values));
238   } else {
239     PetscCall(MatSeqAIJGetArrayRead(amgx->localA, &amgx->values));
240   }
241 
242   if (!partial_setup_allowed) {
243     // Initialise resources and matrices
244     if (!amgx->rsrc_init) {
245       // Read configuration file
246       PetscCallAmgX(AMGX_config_create(&amgx->cfg, amgx->cfg_contents.c_str()));
247       PetscCallAmgX(AMGX_resources_create(&amgx->rsrc, amgx->cfg, &amgx->comm, 1, &amgx->devID));
248       amgx->rsrc_init = true;
249     }
250 
251     PetscCheck(!amgx->solve_state_init, amgx->comm, PETSC_ERR_PLIB, "AmgX solve state initialisation already called.");
252     PetscCallAmgX(AMGX_matrix_create(&amgx->A, amgx->rsrc, AMGX_mode_dDDI));
253     PetscCallAmgX(AMGX_vector_create(&amgx->sol, amgx->rsrc, AMGX_mode_dDDI));
254     PetscCallAmgX(AMGX_vector_create(&amgx->rhs, amgx->rsrc, AMGX_mode_dDDI));
255     PetscCallAmgX(AMGX_solver_create(&amgx->solver, amgx->rsrc, AMGX_mode_dDDI, amgx->cfg));
256     amgx->solve_state_init = true;
257 
258     // Extract the CSR data
259     PetscBool       done;
260     const PetscInt *colIndices;
261     const PetscInt *rowOffsets;
262     PetscCall(MatGetRowIJ(amgx->localA, 0, PETSC_FALSE, PETSC_FALSE, &amgx->nLocalRows, &rowOffsets, &colIndices, &done));
263     PetscCheck(done, amgx->comm, PETSC_ERR_PLIB, "MatGetRowIJ was not successful");
264     PetscCheck(amgx->nLocalRows < std::numeric_limits<int>::max(), PETSC_COMM_SELF, PETSC_ERR_PLIB, "AmgX restricted to int local rows but nLocalRows = %" PetscInt_FMT " > max<int>", amgx->nLocalRows);
265 
266     if (is_dev_ptrs) {
267       PetscCallCUDA(cudaMemcpy(&amgx->nnz, &rowOffsets[amgx->nLocalRows], sizeof(int), cudaMemcpyDefault));
268     } else {
269       amgx->nnz = rowOffsets[amgx->nLocalRows];
270     }
271 
272     PetscCheck(amgx->nnz < std::numeric_limits<int>::max(), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Support for 64-bit integer nnz not yet implemented, nnz = %" PetscInt_FMT ".", amgx->nnz);
273 
274     // Allocate space for some partition offsets
275     std::vector<PetscInt> partitionOffsets(amgx->nranks + 1);
276 
277     // Fetch the number of local rows per rank
278     partitionOffsets[0] = 0; /* could use PetscLayoutGetRanges */
279     PetscCallMPI(MPI_Allgather(&amgx->nLocalRows, 1, MPIU_INT, partitionOffsets.data() + 1, 1, MPIU_INT, amgx->comm));
280     std::partial_sum(partitionOffsets.begin(), partitionOffsets.end(), partitionOffsets.begin());
281 
282     // Fetch the number of global rows
283     amgx->nGlobalRows = partitionOffsets[amgx->nranks];
284 
285     PetscCall(MatGetBlockSize(Pmat, &amgx->bSize));
286 
287     // XXX Currently constrained to 32-bit indices, to be changed in the future
288     // Create the distribution and upload the matrix data
289     AMGX_distribution_handle dist;
290     PetscCallAmgX(AMGX_distribution_create(&dist, amgx->cfg));
291     PetscCallAmgX(AMGX_distribution_set_32bit_colindices(dist, true));
292     PetscCallAmgX(AMGX_distribution_set_partition_data(dist, AMGX_DIST_PARTITION_OFFSETS, partitionOffsets.data()));
293     PetscCallAmgX(AMGX_matrix_upload_distributed(amgx->A, amgx->nGlobalRows, (int)amgx->nLocalRows, (int)amgx->nnz, amgx->bSize, amgx->bSize, rowOffsets, colIndices, amgx->values, NULL, dist));
294     PetscCallAmgX(AMGX_solver_setup(amgx->solver, amgx->A));
295     PetscCallAmgX(AMGX_vector_bind(amgx->sol, amgx->A));
296     PetscCallAmgX(AMGX_vector_bind(amgx->rhs, amgx->A));
297 
298     PetscInt nlr = 0;
299     PetscCall(MatRestoreRowIJ(amgx->localA, 0, PETSC_FALSE, PETSC_FALSE, &nlr, &rowOffsets, &colIndices, &done));
300   } else {
301     // The fast path for if the sparsity pattern persists
302     PetscCallAmgX(AMGX_matrix_replace_coefficients(amgx->A, amgx->nLocalRows, amgx->nnz, amgx->values, NULL));
303     PetscCallAmgX(AMGX_solver_resetup(amgx->solver, amgx->A));
304   }
305 
306   if (is_dev_ptrs) {
307     PetscCall(MatSeqAIJCUSPARSERestoreArrayRead(amgx->localA, &amgx->values));
308   } else {
309     PetscCall(MatSeqAIJRestoreArrayRead(amgx->localA, &amgx->values));
310   }
311   amgx_output_messages(amgx);
312   PetscFunctionReturn(0);
313 }
314 
315 /*
316    PCApply_AMGX - Applies the AmgX preconditioner to a vector.
317 
318    Input Parameters:
319 .  pc - the preconditioner context
320 .  b - rhs vector
321 
322    Output Parameter:
323 .  x - solution vector
324 
325    Application Interface Routine: PCApply()
326  */
327 static PetscErrorCode PCApply_AMGX(PC pc, Vec b, Vec x) {
328   PC_AMGX           *amgx = (PC_AMGX *)pc->data;
329   PetscScalar       *x_;
330   const PetscScalar *b_;
331   PetscBool          is_dev_ptrs;
332 
333   PetscFunctionBegin;
334   PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &is_dev_ptrs, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
335 
336   if (is_dev_ptrs) {
337     PetscCall(VecCUDAGetArrayWrite(x, &x_));
338     PetscCall(VecCUDAGetArrayRead(b, &b_));
339   } else {
340     PetscCall(VecGetArrayWrite(x, &x_));
341     PetscCall(VecGetArrayRead(b, &b_));
342   }
343 
344   PetscCallAmgX(AMGX_vector_upload(amgx->sol, amgx->nLocalRows, 1, x_));
345   PetscCallAmgX(AMGX_vector_upload(amgx->rhs, amgx->nLocalRows, 1, b_));
346   PetscCallAmgX(AMGX_solver_solve_with_0_initial_guess(amgx->solver, amgx->rhs, amgx->sol));
347 
348   AMGX_SOLVE_STATUS status;
349   PetscCallAmgX(AMGX_solver_get_status(amgx->solver, &status));
350   PetscCall(PCSetErrorIfFailure(pc, static_cast<PetscBool>(status == AMGX_SOLVE_FAILED)));
351   PetscCheck(status != AMGX_SOLVE_FAILED, amgx->comm, PETSC_ERR_CONV_FAILED, "AmgX solver failed to solve the system! The error code is %d.", status);
352   PetscCallAmgX(AMGX_vector_download(amgx->sol, x_));
353 
354   if (is_dev_ptrs) {
355     PetscCall(VecCUDARestoreArrayWrite(x, &x_));
356     PetscCall(VecCUDARestoreArrayRead(b, &b_));
357   } else {
358     PetscCall(VecRestoreArrayWrite(x, &x_));
359     PetscCall(VecRestoreArrayRead(b, &b_));
360   }
361   amgx_output_messages(amgx);
362   PetscFunctionReturn(0);
363 }
364 
365 static PetscErrorCode PCReset_AMGX(PC pc) {
366   PC_AMGX *amgx = (PC_AMGX *)pc->data;
367 
368   PetscFunctionBegin;
369   if (amgx->solve_state_init) {
370     PetscCallAmgX(AMGX_solver_destroy(amgx->solver));
371     PetscCallAmgX(AMGX_matrix_destroy(amgx->A));
372     PetscCallAmgX(AMGX_vector_destroy(amgx->sol));
373     PetscCallAmgX(AMGX_vector_destroy(amgx->rhs));
374     if (amgx->nranks > 1) PetscCall(MatDestroy(&amgx->localA));
375     amgx_output_messages(amgx);
376     amgx->solve_state_init = false;
377   }
378   PetscFunctionReturn(0);
379 }
380 
381 /*
382    PCDestroy_AMGX - Destroys the private context for the AmgX preconditioner
383    that was created with PCCreate_AMGX().
384 
385    Input Parameter:
386 .  pc - the preconditioner context
387 
388    Application Interface Routine: PCDestroy()
389 */
390 static PetscErrorCode PCDestroy_AMGX(PC pc) {
391   PC_AMGX *amgx = (PC_AMGX *)pc->data;
392 
393   PetscFunctionBegin;
394   /* decrease the number of instances, only the last instance need to destroy resource and finalizing AmgX */
395   if (s_count == 1) {
396     /* can put this in a PCAMGXInitializePackage method */
397     PetscCheck(amgx->rsrc != nullptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "s_rsrc == NULL");
398     PetscCallAmgX(AMGX_resources_destroy(amgx->rsrc));
399     /* destroy config (need to use AMGX_SAFE_CALL after this point) */
400     PetscCallAmgX(AMGX_config_destroy(amgx->cfg));
401     PetscCallAmgX(AMGX_finalize_plugins());
402     PetscCallAmgX(AMGX_finalize());
403     PetscCallMPI(MPI_Comm_free(&amgx->comm));
404   } else {
405     PetscCallAmgX(AMGX_config_destroy(amgx->cfg));
406   }
407   s_count -= 1;
408   PetscCall(PetscFree(amgx));
409   PetscFunctionReturn(0);
410 }
411 
412 template <class T>
413 std::string map_reverse_lookup(const std::map<std::string, T> &map, const T &key) {
414   for (auto const &m : map) {
415     if (m.second == key) return m.first;
416   }
417   return "";
418 }
419 
420 static PetscErrorCode PCSetFromOptions_AMGX(PC pc, PetscOptionItems *PetscOptionsObject) {
421   PC_AMGX      *amgx          = (PC_AMGX *)pc->data;
422   constexpr int MAX_PARAM_LEN = 128;
423   char          option[MAX_PARAM_LEN];
424 
425   PetscFunctionBegin;
426   PetscOptionsHeadBegin(PetscOptionsObject, "AmgX options");
427   amgx->cfg_contents = "config_version=2,";
428   amgx->cfg_contents += "determinism_flag=1,";
429 
430   // Set exact coarse solve
431   PetscCall(PetscOptionsBool("-pc_amgx_exact_coarse_solve", "AmgX AMG Exact Coarse Solve", "", amgx->exact_coarse_solve, &amgx->exact_coarse_solve, NULL));
432   if (amgx->exact_coarse_solve) amgx->cfg_contents += "exact_coarse_solve=1,";
433 
434   amgx->cfg_contents += "solver(amg)=AMG,";
435 
436   // Set method
437   std::string def_amg_method = map_reverse_lookup(AmgXControlMap::AMGMethods, amgx->amg_method);
438   PetscCall(PetscStrcpy(option, def_amg_method.c_str()));
439   PetscCall(PetscOptionsString("-pc_amgx_amg_method", "AmgX AMG Method", "", option, option, MAX_PARAM_LEN, NULL));
440   PetscCheck(AmgXControlMap::AMGMethods.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "AMG Method %s not registered for AmgX.", option);
441   amgx->amg_method = AmgXControlMap::AMGMethods.at(option);
442   amgx->cfg_contents += "amg:algorithm=" + std::string(option) + ",";
443 
444   // Set cycle
445   std::string def_amg_cycle = map_reverse_lookup(AmgXControlMap::AMGCycles, amgx->amg_cycle);
446   PetscCall(PetscStrcpy(option, def_amg_cycle.c_str()));
447   PetscCall(PetscOptionsString("-pc_amgx_amg_cycle", "AmgX AMG Cycle", "", option, option, MAX_PARAM_LEN, NULL));
448   PetscCheck(AmgXControlMap::AMGCycles.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "AMG Cycle %s not registered for AmgX.", option);
449   amgx->amg_cycle = AmgXControlMap::AMGCycles.at(option);
450   amgx->cfg_contents += "amg:cycle=" + std::string(option) + ",";
451 
452   // Set smoother
453   std::string def_smoother = map_reverse_lookup(AmgXControlMap::Smoothers, amgx->smoother);
454   PetscCall(PetscStrcpy(option, def_smoother.c_str()));
455   PetscCall(PetscOptionsString("-pc_amgx_smoother", "AmgX Smoother", "", option, option, MAX_PARAM_LEN, NULL));
456   PetscCheck(AmgXControlMap::Smoothers.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Smoother %s not registered for AmgX.", option);
457   amgx->smoother = AmgXControlMap::Smoothers.at(option);
458   amgx->cfg_contents += "amg:smoother(smooth)=" + std::string(option) + ",";
459 
460   if (amgx->smoother == AmgXSmoother::JacobiL1 || amgx->smoother == AmgXSmoother::BlockJacobi) {
461     PetscCall(PetscOptionsScalar("-pc_amgx_jacobi_relaxation_factor", "AmgX AMG Jacobi Relaxation Factor", "", amgx->jacobi_relaxation_factor, &amgx->jacobi_relaxation_factor, NULL));
462     amgx->cfg_contents += "smooth:relaxation_factor=" + std::to_string(amgx->jacobi_relaxation_factor) + ",";
463   } else if (amgx->smoother == AmgXSmoother::GS || amgx->smoother == AmgXSmoother::MulticolorGS) {
464     PetscCall(PetscOptionsScalar("-pc_amgx_gs_symmetric", "AmgX AMG Gauss Seidel Symmetric", "", amgx->gs_symmetric, &amgx->gs_symmetric, NULL));
465     amgx->cfg_contents += "smooth:symmetric_GS=" + std::to_string(amgx->gs_symmetric) + ",";
466   }
467 
468   // Set selector
469   std::string def_selector = map_reverse_lookup(AmgXControlMap::Selectors, amgx->selector);
470   PetscCall(PetscStrcpy(option, def_selector.c_str()));
471   PetscCall(PetscOptionsString("-pc_amgx_selector", "AmgX Selector", "", option, option, MAX_PARAM_LEN, NULL));
472   PetscCheck(AmgXControlMap::Selectors.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Selector %s not registered for AmgX.", option);
473 
474   // Double check that the user has selected an appropriate selector for the AMG method
475   if (amgx->amg_method == AmgXAMGMethod::Classical) {
476     PetscCheck(amgx->selector == AmgXSelector::PMIS || amgx->selector == AmgXSelector::HMIS, amgx->comm, PETSC_ERR_PLIB, "Chosen selector is not used for AmgX Classical AMG: selector=%s", option);
477     amgx->cfg_contents += "amg:interpolator=D2,";
478   } else if (amgx->amg_method == AmgXAMGMethod::Aggregation) {
479     PetscCheck(amgx->selector == AmgXSelector::Size2 || amgx->selector == AmgXSelector::Size4 || amgx->selector == AmgXSelector::Size8 || amgx->selector == AmgXSelector::MultiPairwise, amgx->comm, PETSC_ERR_PLIB, "Chosen selector is not used for AmgX Aggregation AMG");
480   }
481   amgx->selector = AmgXControlMap::Selectors.at(option);
482   amgx->cfg_contents += "amg:selector=" + std::string(option) + ",";
483 
484   // Set presweeps
485   PetscCall(PetscOptionsInt("-pc_amgx_presweeps", "AmgX AMG Presweep Count", "", amgx->presweeps, &amgx->presweeps, NULL));
486   amgx->cfg_contents += "amg:presweeps=" + std::to_string(amgx->presweeps) + ",";
487 
488   // Set postsweeps
489   PetscCall(PetscOptionsInt("-pc_amgx_postsweeps", "AmgX AMG Postsweep Count", "", amgx->postsweeps, &amgx->postsweeps, NULL));
490   amgx->cfg_contents += "amg:postsweeps=" + std::to_string(amgx->postsweeps) + ",";
491 
492   // Set max levels
493   PetscCall(PetscOptionsInt("-pc_amgx_max_levels", "AmgX AMG Max Level Count", "", amgx->max_levels, &amgx->max_levels, NULL));
494   amgx->cfg_contents += "amg:max_levels=100,";
495 
496   // Set dense LU num rows
497   PetscCall(PetscOptionsInt("-pc_amgx_dense_lu_num_rows", "AmgX Dense LU Number of Rows", "", amgx->dense_lu_num_rows, &amgx->dense_lu_num_rows, NULL));
498   amgx->cfg_contents += "amg:dense_lu_num_rows=" + std::to_string(amgx->dense_lu_num_rows) + ",";
499 
500   // Set strength threshold
501   PetscCall(PetscOptionsScalar("-pc_amgx_strength_threshold", "AmgX AMG Strength Threshold", "", amgx->strength_threshold, &amgx->strength_threshold, NULL));
502   amgx->cfg_contents += "amg:strength_threshold=" + std::to_string(amgx->strength_threshold) + ",";
503 
504   // Set aggressive_levels
505   PetscCall(PetscOptionsInt("-pc_amgx_aggressive_levels", "AmgX AMG Presweep Count", "", amgx->aggressive_levels, &amgx->aggressive_levels, NULL));
506   if (amgx->aggressive_levels > 0) amgx->cfg_contents += "amg:aggressive_levels=" + std::to_string(amgx->aggressive_levels) + ",";
507 
508   // Set coarse solver
509   std::string def_coarse_solver = map_reverse_lookup(AmgXControlMap::CoarseSolvers, amgx->coarse_solver);
510   PetscCall(PetscStrcpy(option, def_coarse_solver.c_str()));
511   PetscCall(PetscOptionsString("-pc_amgx_coarse_solver", "AmgX CoarseSolver", "", option, option, MAX_PARAM_LEN, NULL));
512   PetscCheck(AmgXControlMap::CoarseSolvers.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "CoarseSolver %s not registered for AmgX.", option);
513   amgx->coarse_solver = AmgXControlMap::CoarseSolvers.at(option);
514   amgx->cfg_contents += "amg:coarse_solver=" + std::string(option) + ",";
515 
516   // Set max iterations
517   amgx->cfg_contents += "amg:max_iters=1,";
518 
519   // Set output control parameters
520   PetscCall(PetscOptionsBool("-pc_amgx_print_grid_stats", "AmgX Print Grid Stats", "", amgx->print_grid_stats, &amgx->print_grid_stats, NULL));
521 
522   if (amgx->print_grid_stats) amgx->cfg_contents += "amg:print_grid_stats=1,";
523   amgx->cfg_contents += "amg:monitor_residual=0";
524 
525   // Set whether AmgX output will be seen
526   PetscCall(PetscOptionsBool("-pc_amgx_verbose", "Enable output from AmgX", "", amgx->verbose, &amgx->verbose, NULL));
527   PetscOptionsHeadEnd();
528   PetscFunctionReturn(0);
529 }
530 
531 static PetscErrorCode PCView_AMGX(PC pc, PetscViewer viewer) {
532   PC_AMGX  *amgx = (PC_AMGX *)pc->data;
533   PetscBool iascii;
534 
535   PetscFunctionBegin;
536   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
537   if (iascii) {
538     std::string output_cfg(amgx->cfg_contents);
539     std::replace(output_cfg.begin(), output_cfg.end(), ',', '\n');
540     PetscCall(PetscViewerASCIIPrintf(viewer, "\n%s\n", output_cfg.c_str()));
541   }
542   PetscFunctionReturn(0);
543 }
544 
545 /*MC
546      PCAMGX - Interface to NVIDIA's AmgX algebraic multigrid
547 
548    Options Database Keys:
549 +    -pc_amgx_amg_method <CLASSICAL,AGGREGATION> - set the AMG algorithm to use
550 .    -pc_amgx_amg_cycle <V,W,F,CG> - set the AMG cycle type
551 .    -pc_amgx_smoother <PCG,PCGF,PBICGSTAB,GMRES,FGMRES,JACOBI_L1,BLOCK_JACOBI,GS,MULTICOLOR_GS,MULTICOLOR_ILU,MULTICOLOR_DILU,CHEBYSHEV_POLY,NOSOLVER> - set the AMG pre/post smoother
552 .    -pc_amgx_jacobi_relaxation_factor - set the relaxation factor for Jacobi smoothing
553 .    -pc_amgx_gs_symmetric - enforce symmetric Gauss-Seidel smoothing (only applies if GS smoothing is selected)
554 .    -pc_amgx_selector <SIZE_2,SIZE_4,SIZE_8,MULTI_PAIRWISE,PMIS,HMIS> - set the AMG coarse selector
555 .    -pc_amgx_presweeps - set the number of AMG pre-sweeps
556 .    -pc_amgx_postsweeps - set the number of AMG post-sweeps
557 .    -pc_amgx_max_levels - set the maximum number of levels in the AMG level hierarchy
558 .    -pc_amgx_strength_threshold - set the strength threshold for the AMG coarsening
559 .    -pc_amgx_aggressive_levels - set the number of levels (from the finest) that should apply aggressive coarsening
560 .    -pc_amgx_coarse_solver <DENSE_LU_SOLVER,NOSOLVER> - set the coarse solve
561 .    -pc_amgx_print_grid_stats - output the AMG grid hierarchy to stdout
562 -    -pc_amgx_verbose - enable AmgX output
563 
564    Level: intermediate
565 
566    Note:
567      Implementation will accept host or device pointers, but good performance will require that the `KSP` is also GPU accelerated so that data is not frequently transferred between host and device.
568 
569 .seealso: `PCGAMG`, `PCHYPRE`, `PCMG`, `PCAmgXGetResources()`, `PCCreate()`, `PCSetType()`, `PCType` (for list of available types), `PC`
570 M*/
571 
572 PETSC_EXTERN PetscErrorCode PCCreate_AMGX(PC pc) {
573   PC_AMGX *amgx;
574 
575   PetscFunctionBegin;
576   PetscCall(PetscNew(&amgx));
577   pc->ops->apply          = PCApply_AMGX;
578   pc->ops->setfromoptions = PCSetFromOptions_AMGX;
579   pc->ops->setup          = PCSetUp_AMGX;
580   pc->ops->view           = PCView_AMGX;
581   pc->ops->destroy        = PCDestroy_AMGX;
582   pc->ops->reset          = PCReset_AMGX;
583   pc->data                = (void *)amgx;
584 
585   // Set the defaults
586   amgx->selector                 = AmgXSelector::PMIS;
587   amgx->smoother                 = AmgXSmoother::BlockJacobi;
588   amgx->amg_method               = AmgXAMGMethod::Classical;
589   amgx->coarse_solver            = AmgXCoarseSolver::DenseLU;
590   amgx->amg_cycle                = AmgXAMGCycle::V;
591   amgx->exact_coarse_solve       = PETSC_TRUE;
592   amgx->presweeps                = 1;
593   amgx->postsweeps               = 1;
594   amgx->max_levels               = 100;
595   amgx->strength_threshold       = 0.5;
596   amgx->aggressive_levels        = 0;
597   amgx->dense_lu_num_rows        = 1;
598   amgx->jacobi_relaxation_factor = 0.9;
599   amgx->gs_symmetric             = PETSC_FALSE;
600   amgx->print_grid_stats         = PETSC_FALSE;
601   amgx->verbose                  = PETSC_FALSE;
602   amgx->rsrc_init                = false;
603   amgx->solve_state_init         = false;
604 
605   s_count++;
606 
607   PetscCallCUDA(cudaGetDevice(&amgx->devID));
608   if (s_count == 1) {
609     PetscCallAmgX(AMGX_initialize());
610     PetscCallAmgX(AMGX_initialize_plugins());
611     PetscCallAmgX(AMGX_register_print_callback(&print_callback));
612     PetscCallAmgX(AMGX_install_signal_handler());
613   }
614   /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
615   PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &amgx->comm));
616   PetscCallMPI(MPI_Comm_size(amgx->comm, &amgx->nranks));
617   PetscCallMPI(MPI_Comm_rank(amgx->comm, &amgx->rank));
618 
619   amgx_output_messages(amgx);
620   PetscFunctionReturn(0);
621 }
622 
623 /*@C
624    PCAmgXGetResources - get AMGx's internal resource object
625 
626     Not Collective
627 
628    Input Parameter:
629 .  pc - the PC
630 
631    Output Parameter:
632 .  rsrc_out - pointer to the AMGx resource object
633 
634    Level: advanced
635 
636 .seealso: `PCAMGX`, `PC`, `PCGAMG`
637 @*/
638 PETSC_EXTERN PetscErrorCode PCAmgXGetResources(PC pc, void *rsrc_out) {
639   PC_AMGX *amgx = (PC_AMGX *)pc->data;
640 
641   PetscFunctionBegin;
642   if (!amgx->rsrc_init) {
643     // Read configuration file
644     PetscCallAmgX(AMGX_config_create(&amgx->cfg, amgx->cfg_contents.c_str()));
645     PetscCallAmgX(AMGX_resources_create(&amgx->rsrc, amgx->cfg, &amgx->comm, 1, &amgx->devID));
646     amgx->rsrc_init = true;
647   }
648   *static_cast<AMGX_resources_handle *>(rsrc_out) = amgx->rsrc;
649   PetscFunctionReturn(0);
650 }
651