xref: /petsc/src/ksp/pc/impls/amgx/amgx.cxx (revision d2522c19e8fa9bca20aaca277941d9a63e71db6a)
1 /*
2      This file implements an AmgX preconditioner in PETSc as part of PC.
3  */
4 
5 /*
6    Include files needed for the AmgX preconditioner:
7      pcimpl.h - private include file intended for use by all preconditioners
8 */
9 
10 #include <petsc/private/pcimpl.h> /*I "petscpc.h" I*/
11 #include <petscdevice.h>
12 #include <amgx_c.h>
13 #include <limits>
14 #include <vector>
15 #include <algorithm>
16 #include <map>
17 #include <numeric>
18 #include "cuda_runtime.h"
19 
20 enum class AmgXSmoother {
21   PCG,
22   PCGF,
23   PBiCGStab,
24   GMRES,
25   FGMRES,
26   JacobiL1,
27   BlockJacobi,
28   GS,
29   MulticolorGS,
30   MulticolorILU,
31   MulticolorDILU,
32   ChebyshevPoly,
33   NoSolver
34 };
35 enum class AmgXAMGMethod {
36   Classical,
37   Aggregation
38 };
39 enum class AmgXSelector {
40   Size2,
41   Size4,
42   Size8,
43   MultiPairwise,
44   PMIS,
45   HMIS
46 };
47 enum class AmgXCoarseSolver {
48   DenseLU,
49   NoSolver
50 };
51 enum class AmgXAMGCycle {
52   V,
53   W,
54   F,
55   CG,
56   CGF
57 };
58 
59 struct AmgXControlMap {
60   static const std::map<std::string, AmgXAMGMethod>    AMGMethods;
61   static const std::map<std::string, AmgXSmoother>     Smoothers;
62   static const std::map<std::string, AmgXSelector>     Selectors;
63   static const std::map<std::string, AmgXCoarseSolver> CoarseSolvers;
64   static const std::map<std::string, AmgXAMGCycle>     AMGCycles;
65 };
66 
67 const std::map<std::string, AmgXAMGMethod> AmgXControlMap::AMGMethods = {
68   {"CLASSICAL",   AmgXAMGMethod::Classical  },
69   {"AGGREGATION", AmgXAMGMethod::Aggregation}
70 };
71 
72 const std::map<std::string, AmgXSmoother> AmgXControlMap::Smoothers = {
73   {"PCG",             AmgXSmoother::PCG           },
74   {"PCGF",            AmgXSmoother::PCGF          },
75   {"PBICGSTAB",       AmgXSmoother::PBiCGStab     },
76   {"GMRES",           AmgXSmoother::GMRES         },
77   {"FGMRES",          AmgXSmoother::FGMRES        },
78   {"JACOBI_L1",       AmgXSmoother::JacobiL1      },
79   {"BLOCK_JACOBI",    AmgXSmoother::BlockJacobi   },
80   {"GS",              AmgXSmoother::GS            },
81   {"MULTICOLOR_GS",   AmgXSmoother::MulticolorGS  },
82   {"MULTICOLOR_ILU",  AmgXSmoother::MulticolorILU },
83   {"MULTICOLOR_DILU", AmgXSmoother::MulticolorDILU},
84   {"CHEBYSHEV_POLY",  AmgXSmoother::ChebyshevPoly },
85   {"NOSOLVER",        AmgXSmoother::NoSolver      }
86 };
87 
88 const std::map<std::string, AmgXSelector> AmgXControlMap::Selectors = {
89   {"SIZE_2",         AmgXSelector::Size2        },
90   {"SIZE_4",         AmgXSelector::Size4        },
91   {"SIZE_8",         AmgXSelector::Size8        },
92   {"MULTI_PAIRWISE", AmgXSelector::MultiPairwise},
93   {"PMIS",           AmgXSelector::PMIS         },
94   {"HMIS",           AmgXSelector::HMIS         }
95 };
96 
97 const std::map<std::string, AmgXCoarseSolver> AmgXControlMap::CoarseSolvers = {
98   {"DENSE_LU_SOLVER", AmgXCoarseSolver::DenseLU },
99   {"NOSOLVER",        AmgXCoarseSolver::NoSolver}
100 };
101 
102 const std::map<std::string, AmgXAMGCycle> AmgXControlMap::AMGCycles = {
103   {"V",   AmgXAMGCycle::V  },
104   {"W",   AmgXAMGCycle::W  },
105   {"F",   AmgXAMGCycle::F  },
106   {"CG",  AmgXAMGCycle::CG },
107   {"CGF", AmgXAMGCycle::CGF}
108 };
109 
110 /*
111    Private context (data structure) for the AMGX preconditioner.
112 */
113 struct PC_AMGX {
114   AMGX_solver_handle    solver;
115   AMGX_config_handle    cfg;
116   AMGX_resources_handle rsrc;
117   bool                  solve_state_init;
118   bool                  rsrc_init;
119   PetscBool             verbose;
120 
121   AMGX_matrix_handle A;
122   AMGX_vector_handle sol;
123   AMGX_vector_handle rhs;
124 
125   MPI_Comm    comm;
126   PetscMPIInt rank   = 0;
127   PetscMPIInt nranks = 0;
128   int         devID  = 0;
129 
130   void       *lib_handle = 0;
131   std::string cfg_contents;
132 
133   // Cached state for re-setup
134   PetscInt           nnz;
135   PetscInt           nLocalRows;
136   PetscInt           nGlobalRows;
137   PetscInt           bSize;
138   Mat                localA;
139   const PetscScalar *values;
140 
141   // AMG Control parameters
142   AmgXSmoother     smoother;
143   AmgXAMGMethod    amg_method;
144   AmgXSelector     selector;
145   AmgXCoarseSolver coarse_solver;
146   AmgXAMGCycle     amg_cycle;
147   PetscInt         presweeps;
148   PetscInt         postsweeps;
149   PetscInt         max_levels;
150   PetscInt         aggressive_levels;
151   PetscInt         dense_lu_num_rows;
152   PetscScalar      strength_threshold;
153   PetscBool        print_grid_stats;
154   PetscBool        exact_coarse_solve;
155 
156   // Smoother control parameters
157   PetscScalar jacobi_relaxation_factor;
158   PetscScalar gs_symmetric;
159 };
160 
161 static PetscInt s_count = 0;
162 
163 // Buffer of messages from AmgX
164 // Currently necessary hack before we adapt AmgX to print from single rank only
165 static std::string amgx_output{};
166 
167 // A print callback that allows AmgX to return status messages
168 static void print_callback(const char *msg, int length) {
169   amgx_output.append(msg);
170 }
171 
172 // Outputs messages from the AmgX message buffer and clears it
173 PetscErrorCode amgx_output_messages(PC_AMGX *amgx) {
174   PetscFunctionBegin;
175 
176   // If AmgX output is enabled and we have a message, output it
177   if (amgx->verbose && !amgx_output.empty()) {
178     // Only a single rank to output the AmgX messages
179     PetscCall(PetscPrintf(amgx->comm, "AMGX: %s", amgx_output.c_str()));
180 
181     // Note that all ranks clear their received output
182     amgx_output.clear();
183   }
184 
185   PetscFunctionReturn(0);
186 }
187 
188 // XXX Need to add call in AmgX API that gracefully destroys everything
189 // without abort etc.
190 #define PetscCallAmgX(rc) \
191   do { \
192     AMGX_RC err = (rc); \
193     char    msg[4096]; \
194     switch (err) { \
195     case AMGX_RC_OK: break; \
196     default: AMGX_get_error_string(err, msg, 4096); SETERRQ(amgx->comm, PETSC_ERR_LIB, "%s", msg); \
197     } \
198   } while (0)
199 
200 /*
201    PCSetUp_AMGX - Prepares for the use of the AmgX preconditioner
202                     by setting data structures and options.
203 
204    Input Parameter:
205 .  pc - the preconditioner context
206 
207    Application Interface Routine: PCSetUp()
208 
209    Notes:
210    The interface routine PCSetUp() is not usually called directly by
211    the user, but instead is called by PCApply() if necessary.
212 */
213 static PetscErrorCode PCSetUp_AMGX(PC pc) {
214   PC_AMGX  *amgx = (PC_AMGX *)pc->data;
215   Mat       Pmat = pc->pmat;
216   PetscBool is_dev_ptrs;
217 
218   PetscFunctionBegin;
219   PetscCall(PetscObjectTypeCompareAny((PetscObject)Pmat, &is_dev_ptrs, MATAIJCUSPARSE, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
220 
221   // At the present time, an AmgX matrix is a sequential matrix
222   // Non-sequential/MPI matrices must be adapted to extract the local matrix
223   bool partial_setup_allowed = (pc->setupcalled && pc->flag != DIFFERENT_NONZERO_PATTERN);
224   if (amgx->nranks > 1) {
225     if (partial_setup_allowed) {
226       PetscCall(MatMPIAIJGetLocalMat(Pmat, MAT_REUSE_MATRIX, &amgx->localA));
227     } else {
228       PetscCall(MatMPIAIJGetLocalMat(Pmat, MAT_INITIAL_MATRIX, &amgx->localA));
229     }
230 
231     if (is_dev_ptrs) { PetscCall(MatConvert(amgx->localA, MATSEQAIJCUSPARSE, MAT_INPLACE_MATRIX, &amgx->localA)); }
232   } else {
233     amgx->localA = Pmat;
234   }
235 
236   if (is_dev_ptrs) {
237     PetscCall(MatSeqAIJCUSPARSEGetArrayRead(amgx->localA, &amgx->values));
238   } else {
239     PetscCall(MatSeqAIJGetArrayRead(amgx->localA, &amgx->values));
240   }
241 
242   if (!partial_setup_allowed) {
243     // Initialise resources and matrices
244     if (!amgx->rsrc_init) {
245       // Read configuration file
246       PetscCallAmgX(AMGX_config_create(&amgx->cfg, amgx->cfg_contents.c_str()));
247       PetscCallAmgX(AMGX_resources_create(&amgx->rsrc, amgx->cfg, &amgx->comm, 1, &amgx->devID));
248       amgx->rsrc_init = true;
249     }
250 
251     PetscCheck(!amgx->solve_state_init, amgx->comm, PETSC_ERR_PLIB, "AmgX solve state initialisation already called.");
252     PetscCallAmgX(AMGX_matrix_create(&amgx->A, amgx->rsrc, AMGX_mode_dDDI));
253     PetscCallAmgX(AMGX_vector_create(&amgx->sol, amgx->rsrc, AMGX_mode_dDDI));
254     PetscCallAmgX(AMGX_vector_create(&amgx->rhs, amgx->rsrc, AMGX_mode_dDDI));
255     PetscCallAmgX(AMGX_solver_create(&amgx->solver, amgx->rsrc, AMGX_mode_dDDI, amgx->cfg));
256     amgx->solve_state_init = true;
257 
258     // Extract the CSR data
259     PetscBool       done;
260     const PetscInt *colIndices;
261     const PetscInt *rowOffsets;
262     PetscCall(MatGetRowIJ(amgx->localA, 0, PETSC_FALSE, PETSC_FALSE, &amgx->nLocalRows, &rowOffsets, &colIndices, &done));
263     PetscCheck(done, amgx->comm, PETSC_ERR_PLIB, "MatGetRowIJ was not successful");
264     PetscCheck(amgx->nLocalRows < std::numeric_limits<int>::max(), PETSC_COMM_SELF, PETSC_ERR_PLIB, "AmgX restricted to int local rows but nLocalRows = %" PetscInt_FMT " > max<int>", amgx->nLocalRows);
265 
266     if (is_dev_ptrs) {
267       PetscCallCUDA(cudaMemcpy(&amgx->nnz, &rowOffsets[amgx->nLocalRows], sizeof(int), cudaMemcpyDefault));
268     } else {
269       amgx->nnz = rowOffsets[amgx->nLocalRows];
270     }
271 
272     PetscCheck(amgx->nnz < std::numeric_limits<int>::max(), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Support for 64-bit integer nnz not yet implemented, nnz = %" PetscInt_FMT ".", amgx->nnz);
273 
274     // Allocate space for some partition offsets
275     std::vector<PetscInt> partitionOffsets(amgx->nranks + 1);
276 
277     // Fetch the number of local rows per rank
278     partitionOffsets[0] = 0; /* could use PetscLayoutGetRanges */
279     PetscCallMPI(MPI_Allgather(&amgx->nLocalRows, 1, MPIU_INT, partitionOffsets.data() + 1, 1, MPIU_INT, amgx->comm));
280     std::partial_sum(partitionOffsets.begin(), partitionOffsets.end(), partitionOffsets.begin());
281 
282     // Fetch the number of global rows
283     amgx->nGlobalRows = partitionOffsets[amgx->nranks];
284 
285     PetscCall(MatGetBlockSize(Pmat, &amgx->bSize));
286 
287     // XXX Currently constrained to 32-bit indices, to be changed in the future
288     // Create the distribution and upload the matrix data
289     AMGX_distribution_handle dist;
290     PetscCallAmgX(AMGX_distribution_create(&dist, amgx->cfg));
291     PetscCallAmgX(AMGX_distribution_set_32bit_colindices(dist, true));
292     PetscCallAmgX(AMGX_distribution_set_partition_data(dist, AMGX_DIST_PARTITION_OFFSETS, partitionOffsets.data()));
293     PetscCallAmgX(AMGX_matrix_upload_distributed(amgx->A, amgx->nGlobalRows, (int)amgx->nLocalRows, (int)amgx->nnz, amgx->bSize, amgx->bSize, rowOffsets, colIndices, amgx->values, NULL, dist));
294     PetscCallAmgX(AMGX_solver_setup(amgx->solver, amgx->A));
295     PetscCallAmgX(AMGX_vector_bind(amgx->sol, amgx->A));
296     PetscCallAmgX(AMGX_vector_bind(amgx->rhs, amgx->A));
297 
298     PetscInt nlr = 0;
299     PetscCall(MatRestoreRowIJ(amgx->localA, 0, PETSC_FALSE, PETSC_FALSE, &nlr, &rowOffsets, &colIndices, &done));
300   } else {
301     // The fast path for if the sparsity pattern persists
302     PetscCallAmgX(AMGX_matrix_replace_coefficients(amgx->A, amgx->nLocalRows, amgx->nnz, amgx->values, NULL));
303     PetscCallAmgX(AMGX_solver_resetup(amgx->solver, amgx->A));
304   }
305 
306   if (is_dev_ptrs) {
307     PetscCall(MatSeqAIJCUSPARSERestoreArrayRead(amgx->localA, &amgx->values));
308   } else {
309     PetscCall(MatSeqAIJRestoreArrayRead(amgx->localA, &amgx->values));
310   }
311   amgx_output_messages(amgx);
312   PetscFunctionReturn(0);
313 }
314 
315 /*
316    PCApply_AMGX - Applies the AmgX preconditioner to a vector.
317 
318    Input Parameters:
319 .  pc - the preconditioner context
320 .  b - rhs vector
321 
322    Output Parameter:
323 .  x - solution vector
324 
325    Application Interface Routine: PCApply()
326  */
327 static PetscErrorCode PCApply_AMGX(PC pc, Vec b, Vec x) {
328   PC_AMGX           *amgx = (PC_AMGX *)pc->data;
329   PetscScalar       *x_;
330   const PetscScalar *b_;
331   PetscBool          is_dev_ptrs;
332 
333   PetscFunctionBegin;
334   PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &is_dev_ptrs, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
335 
336   if (is_dev_ptrs) {
337     PetscCall(VecCUDAGetArrayWrite(x, &x_));
338     PetscCall(VecCUDAGetArrayRead(b, &b_));
339   } else {
340     PetscCall(VecGetArrayWrite(x, &x_));
341     PetscCall(VecGetArrayRead(b, &b_));
342   }
343 
344   PetscCallAmgX(AMGX_vector_upload(amgx->sol, amgx->nLocalRows, 1, x_));
345   PetscCallAmgX(AMGX_vector_upload(amgx->rhs, amgx->nLocalRows, 1, b_));
346   PetscCallAmgX(AMGX_solver_solve_with_0_initial_guess(amgx->solver, amgx->rhs, amgx->sol));
347 
348   AMGX_SOLVE_STATUS status;
349   PetscCallAmgX(AMGX_solver_get_status(amgx->solver, &status));
350   PetscCall(PCSetErrorIfFailure(pc, static_cast<PetscBool>(status == AMGX_SOLVE_FAILED)));
351   PetscCheck(status != AMGX_SOLVE_FAILED, amgx->comm, PETSC_ERR_CONV_FAILED, "AmgX solver failed to solve the system! The error code is %d.", status);
352   PetscCallAmgX(AMGX_vector_download(amgx->sol, x_));
353 
354   if (is_dev_ptrs) {
355     PetscCall(VecCUDARestoreArrayWrite(x, &x_));
356     PetscCall(VecCUDARestoreArrayRead(b, &b_));
357   } else {
358     PetscCall(VecRestoreArrayWrite(x, &x_));
359     PetscCall(VecRestoreArrayRead(b, &b_));
360   }
361   amgx_output_messages(amgx);
362   PetscFunctionReturn(0);
363 }
364 
365 static PetscErrorCode PCReset_AMGX(PC pc) {
366   PC_AMGX *amgx = (PC_AMGX *)pc->data;
367 
368   PetscFunctionBegin;
369   if (amgx->solve_state_init) {
370     PetscCallAmgX(AMGX_solver_destroy(amgx->solver));
371     PetscCallAmgX(AMGX_matrix_destroy(amgx->A));
372     PetscCallAmgX(AMGX_vector_destroy(amgx->sol));
373     PetscCallAmgX(AMGX_vector_destroy(amgx->rhs));
374     if (amgx->nranks > 1) PetscCall(MatDestroy(&amgx->localA));
375     amgx_output_messages(amgx);
376     amgx->solve_state_init = false;
377   }
378   PetscFunctionReturn(0);
379 }
380 
381 /*
382    PCDestroy_AMGX - Destroys the private context for the AmgX preconditioner
383    that was created with PCCreate_AMGX().
384 
385    Input Parameter:
386 .  pc - the preconditioner context
387 
388    Application Interface Routine: PCDestroy()
389 */
390 static PetscErrorCode PCDestroy_AMGX(PC pc) {
391   PC_AMGX *amgx = (PC_AMGX *)pc->data;
392 
393   PetscFunctionBegin;
394   /* decrease the number of instances, only the last instance need to destroy resource and finalizing AmgX */
395   if (s_count == 1) {
396     /* can put this in a PCAMGXInitializePackage method */
397     PetscCheck(amgx->rsrc != nullptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "s_rsrc == NULL");
398     PetscCallAmgX(AMGX_resources_destroy(amgx->rsrc));
399     /* destroy config (need to use AMGX_SAFE_CALL after this point) */
400     PetscCallAmgX(AMGX_config_destroy(amgx->cfg));
401     PetscCallAmgX(AMGX_finalize_plugins());
402     PetscCallAmgX(AMGX_finalize());
403     PetscCallMPI(MPI_Comm_free(&amgx->comm));
404   } else {
405     PetscCallAmgX(AMGX_config_destroy(amgx->cfg));
406   }
407   s_count -= 1;
408   PetscCall(PetscFree(amgx));
409   PetscFunctionReturn(0);
410 }
411 
412 template <class T>
413 std::string map_reverse_lookup(const std::map<std::string, T> &map, const T &key) {
414   for (auto const &m : map) {
415     if (m.second == key) { return m.first; }
416   }
417   return "";
418 }
419 
420 static PetscErrorCode PCSetFromOptions_AMGX(PetscOptionItems *PetscOptionsObject, PC pc) {
421   PC_AMGX      *amgx          = (PC_AMGX *)pc->data;
422   constexpr int MAX_PARAM_LEN = 128;
423   char          option[MAX_PARAM_LEN];
424 
425   PetscFunctionBegin;
426   PetscOptionsHeadBegin(PetscOptionsObject, "AmgX options");
427   amgx->cfg_contents = "config_version=2,";
428   amgx->cfg_contents += "determinism_flag=1,";
429 
430   // Set exact coarse solve
431   PetscCall(PetscOptionsBool("-pc_amgx_exact_coarse_solve", "AmgX AMG Exact Coarse Solve", "", amgx->exact_coarse_solve, &amgx->exact_coarse_solve, NULL));
432   if (amgx->exact_coarse_solve) { amgx->cfg_contents += "exact_coarse_solve=1,"; }
433 
434   amgx->cfg_contents += "solver(amg)=AMG,";
435 
436   // Set method
437   std::string def_amg_method = map_reverse_lookup(AmgXControlMap::AMGMethods, amgx->amg_method);
438   PetscCall(PetscStrcpy(option, def_amg_method.c_str()));
439   PetscCall(PetscOptionsString("-pc_amgx_amg_method", "AmgX AMG Method", "", option, option, MAX_PARAM_LEN, NULL));
440   PetscCheck(AmgXControlMap::AMGMethods.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "AMG Method %s not registered for AmgX.", option);
441   amgx->amg_method = AmgXControlMap::AMGMethods.at(option);
442   amgx->cfg_contents += "amg:algorithm=" + std::string(option) + ",";
443 
444   // Set cycle
445   std::string def_amg_cycle = map_reverse_lookup(AmgXControlMap::AMGCycles, amgx->amg_cycle);
446   PetscCall(PetscStrcpy(option, def_amg_cycle.c_str()));
447   PetscCall(PetscOptionsString("-pc_amgx_amg_cycle", "AmgX AMG Cycle", "", option, option, MAX_PARAM_LEN, NULL));
448   PetscCheck(AmgXControlMap::AMGCycles.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "AMG Cycle %s not registered for AmgX.", option);
449   amgx->amg_cycle = AmgXControlMap::AMGCycles.at(option);
450   amgx->cfg_contents += "amg:cycle=" + std::string(option) + ",";
451 
452   // Set smoother
453   std::string def_smoother = map_reverse_lookup(AmgXControlMap::Smoothers, amgx->smoother);
454   PetscCall(PetscStrcpy(option, def_smoother.c_str()));
455   PetscCall(PetscOptionsString("-pc_amgx_smoother", "AmgX Smoother", "", option, option, MAX_PARAM_LEN, NULL));
456   PetscCheck(AmgXControlMap::Smoothers.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Smoother %s not registered for AmgX.", option);
457   amgx->smoother = AmgXControlMap::Smoothers.at(option);
458   amgx->cfg_contents += "amg:smoother(smooth)=" + std::string(option) + ",";
459 
460   if (amgx->smoother == AmgXSmoother::JacobiL1 || amgx->smoother == AmgXSmoother::BlockJacobi) {
461     PetscCall(PetscOptionsScalar("-pc_amgx_jacobi_relaxation_factor", "AmgX AMG Jacobi Relaxation Factor", "", amgx->jacobi_relaxation_factor, &amgx->jacobi_relaxation_factor, NULL));
462     amgx->cfg_contents += "smooth:relaxation_factor=" + std::to_string(amgx->jacobi_relaxation_factor) + ",";
463   } else if (amgx->smoother == AmgXSmoother::GS || amgx->smoother == AmgXSmoother::MulticolorGS) {
464     PetscCall(PetscOptionsScalar("-pc_amgx_gs_symmetric", "AmgX AMG Gauss Seidel Symmetric", "", amgx->gs_symmetric, &amgx->gs_symmetric, NULL));
465     amgx->cfg_contents += "smooth:symmetric_GS=" + std::to_string(amgx->gs_symmetric) + ",";
466   }
467 
468   // Set selector
469   std::string def_selector = map_reverse_lookup(AmgXControlMap::Selectors, amgx->selector);
470   PetscCall(PetscStrcpy(option, def_selector.c_str()));
471   PetscCall(PetscOptionsString("-pc_amgx_selector", "AmgX Selector", "", option, option, MAX_PARAM_LEN, NULL));
472   PetscCheck(AmgXControlMap::Selectors.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Selector %s not registered for AmgX.", option);
473 
474   // Double check that the user has selected an appropriate selector for the AMG method
475   if (amgx->amg_method == AmgXAMGMethod::Classical) {
476     PetscCheck(amgx->selector == AmgXSelector::PMIS || amgx->selector == AmgXSelector::HMIS, amgx->comm, PETSC_ERR_PLIB, "Chosen selector is not used for AmgX Classical AMG: selector=%s", option);
477     amgx->cfg_contents += "amg:interpolator=D2,";
478   } else if (amgx->amg_method == AmgXAMGMethod::Aggregation) {
479     PetscCheck(amgx->selector == AmgXSelector::Size2 || amgx->selector == AmgXSelector::Size4 || amgx->selector == AmgXSelector::Size8 || amgx->selector == AmgXSelector::MultiPairwise, amgx->comm, PETSC_ERR_PLIB, "Chosen selector is not used for AmgX Aggregation AMG");
480   }
481   amgx->selector = AmgXControlMap::Selectors.at(option);
482   amgx->cfg_contents += "amg:selector=" + std::string(option) + ",";
483 
484   // Set presweeps
485   PetscCall(PetscOptionsInt("-pc_amgx_presweeps", "AmgX AMG Presweep Count", "", amgx->presweeps, &amgx->presweeps, NULL));
486   amgx->cfg_contents += "amg:presweeps=" + std::to_string(amgx->presweeps) + ",";
487 
488   // Set postsweeps
489   PetscCall(PetscOptionsInt("-pc_amgx_postsweeps", "AmgX AMG Postsweep Count", "", amgx->postsweeps, &amgx->postsweeps, NULL));
490   amgx->cfg_contents += "amg:postsweeps=" + std::to_string(amgx->postsweeps) + ",";
491 
492   // Set max levels
493   PetscCall(PetscOptionsInt("-pc_amgx_max_levels", "AmgX AMG Max Level Count", "", amgx->max_levels, &amgx->max_levels, NULL));
494   amgx->cfg_contents += "amg:max_levels=100,";
495 
496   // Set dense LU num rows
497   PetscCall(PetscOptionsInt("-pc_amgx_dense_lu_num_rows", "AmgX Dense LU Number of Rows", "", amgx->dense_lu_num_rows, &amgx->dense_lu_num_rows, NULL));
498   amgx->cfg_contents += "amg:dense_lu_num_rows=" + std::to_string(amgx->dense_lu_num_rows) + ",";
499 
500   // Set strength threshold
501   PetscCall(PetscOptionsScalar("-pc_amgx_strength_threshold", "AmgX AMG Strength Threshold", "", amgx->strength_threshold, &amgx->strength_threshold, NULL));
502   amgx->cfg_contents += "amg:strength_threshold=" + std::to_string(amgx->strength_threshold) + ",";
503 
504   // Set aggressive_levels
505   PetscCall(PetscOptionsInt("-pc_amgx_aggressive_levels", "AmgX AMG Presweep Count", "", amgx->aggressive_levels, &amgx->aggressive_levels, NULL));
506   if (amgx->aggressive_levels > 0) { amgx->cfg_contents += "amg:aggressive_levels=" + std::to_string(amgx->aggressive_levels) + ","; }
507 
508   // Set coarse solver
509   std::string def_coarse_solver = map_reverse_lookup(AmgXControlMap::CoarseSolvers, amgx->coarse_solver);
510   PetscCall(PetscStrcpy(option, def_coarse_solver.c_str()));
511   PetscCall(PetscOptionsString("-pc_amgx_coarse_solver", "AmgX CoarseSolver", "", option, option, MAX_PARAM_LEN, NULL));
512   PetscCheck(AmgXControlMap::CoarseSolvers.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "CoarseSolver %s not registered for AmgX.", option);
513   amgx->coarse_solver = AmgXControlMap::CoarseSolvers.at(option);
514   amgx->cfg_contents += "amg:coarse_solver=" + std::string(option) + ",";
515 
516   // Set max iterations
517   amgx->cfg_contents += "amg:max_iters=1,";
518 
519   // Set output control parameters
520   PetscCall(PetscOptionsBool("-pc_amgx_print_grid_stats", "AmgX Print Grid Stats", "", amgx->print_grid_stats, &amgx->print_grid_stats, NULL));
521 
522   if (amgx->print_grid_stats) { amgx->cfg_contents += "amg:print_grid_stats=1,"; }
523   amgx->cfg_contents += "amg:monitor_residual=0";
524 
525   // Set whether AmgX output will be seen
526   PetscCall(PetscOptionsBool("-pc_amgx_verbose", "Enable output from AmgX", "", amgx->verbose, &amgx->verbose, NULL));
527   PetscOptionsHeadEnd();
528   PetscFunctionReturn(0);
529 }
530 
531 static PetscErrorCode PCView_AMGX(PC pc, PetscViewer viewer) {
532   PC_AMGX  *amgx = (PC_AMGX *)pc->data;
533   PetscBool iascii;
534 
535   PetscFunctionBegin;
536   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
537   if (iascii) {
538     std::string output_cfg(amgx->cfg_contents);
539     std::replace(output_cfg.begin(), output_cfg.end(), ',', '\n');
540     PetscCall(PetscViewerASCIIPrintf(viewer, "\n%s\n", output_cfg.c_str()));
541   }
542   PetscFunctionReturn(0);
543 }
544 
545 /*
546    PCCreate_AMGX - Creates a AmgX preconditioner context, PC_AMGX,
547    and sets this as the private data within the generic preconditioning
548    context, PC, that was created within PCCreate().
549 
550    Input Parameter:
551 .  pc - the preconditioner context
552 
553    Application Interface Routine: PCCreate()
554 */
555 
556 /*MC
557      PCAMGX - Interface to NVIDIA's AmgX algebraic multigrid
558 
559    Options Database Keys:
560 +    -pc_amgx_amg_method <CLASSICAL,AGGREGATION> - set the AMG algorithm to use
561 .    -pc_amgx_amg_cycle <V,W,F,CG> - set the AMG cycle type
562 .    -pc_amgx_smoother <PCG,PCGF,PBICGSTAB,GMRES,FGMRES,JACOBI_L1,BLOCK_JACOBI,GS,MULTICOLOR_GS,MULTICOLOR_ILU,MULTICOLOR_DILU,CHEBYSHEV_POLY,NOSOLVER> - set the AMG pre/post smoother
563 .    -pc_amgx_jacobi_relaxation_factor - set the relaxation factor for Jacobi smoothing
564 .    -pc_amgx_gs_symmetric - enforce symmetric Gauss-Seidel smoothing (only applies if GS smoothing is selected)
565 .    -pc_amgx_selector <SIZE_2,SIZE_4,SIZE_8,MULTI_PAIRWISE,PMIS,HMIS> - set the AMG coarse selector
566 .    -pc_amgx_presweeps - set the number of AMG pre-sweeps
567 .    -pc_amgx_postsweeps - set the number of AMG post-sweeps
568 .    -pc_amgx_max_levels - set the maximum number of levels in the AMG level hierarchy
569 .    -pc_amgx_strength_threshold - set the strength threshold for the AMG coarsening
570 .    -pc_amgx_aggressive_levels - set the number of levels (from the finest) that should apply aggressive coarsening
571 .    -pc_amgx_coarse_solver <DENSE_LU_SOLVER,NOSOLVER> - set the coarse solve
572 .    -pc_amgx_print_grid_stats - output the AMG grid hierarchy to stdout
573 -    -pc_amgx_verbose - enable AmgX output
574 
575    Level: intermediate
576 
577    Notes:
578      Preconditioner supplied by the GPU accelerated library AmgX. Implementation will accept host or device pointers, but good performance will require that the KSP is also GPU accelerated so that data is not frequently transferred between host and device.
579 
580 .seealso:  `PCGAMG`, `PCHYPRE`, `PCMG`, `PCAmgXGetResources()`, `PCCreate()`, `PCSetType()`, `PCType` (for list of available types), `PC`
581 M*/
582 
583 PETSC_EXTERN PetscErrorCode PCCreate_AMGX(PC pc) {
584   PC_AMGX *amgx;
585 
586   PetscFunctionBegin;
587   PetscCall(PetscNewLog(pc, &amgx));
588   pc->ops->apply          = PCApply_AMGX;
589   pc->ops->setfromoptions = PCSetFromOptions_AMGX;
590   pc->ops->setup          = PCSetUp_AMGX;
591   pc->ops->view           = PCView_AMGX;
592   pc->ops->destroy        = PCDestroy_AMGX;
593   pc->ops->reset          = PCReset_AMGX;
594   pc->data                = (void *)amgx;
595 
596   // Set the defaults
597   amgx->selector                 = AmgXSelector::PMIS;
598   amgx->smoother                 = AmgXSmoother::BlockJacobi;
599   amgx->amg_method               = AmgXAMGMethod::Classical;
600   amgx->coarse_solver            = AmgXCoarseSolver::DenseLU;
601   amgx->amg_cycle                = AmgXAMGCycle::V;
602   amgx->exact_coarse_solve       = PETSC_TRUE;
603   amgx->presweeps                = 1;
604   amgx->postsweeps               = 1;
605   amgx->max_levels               = 100;
606   amgx->strength_threshold       = 0.5;
607   amgx->aggressive_levels        = 0;
608   amgx->dense_lu_num_rows        = 1;
609   amgx->jacobi_relaxation_factor = 0.9;
610   amgx->gs_symmetric             = PETSC_FALSE;
611   amgx->print_grid_stats         = PETSC_FALSE;
612   amgx->verbose                  = PETSC_FALSE;
613   amgx->rsrc_init                = false;
614   amgx->solve_state_init         = false;
615 
616   s_count++;
617 
618   PetscCallCUDA(cudaGetDevice(&amgx->devID));
619   if (s_count == 1) {
620     PetscCallAmgX(AMGX_initialize());
621     PetscCallAmgX(AMGX_initialize_plugins());
622     PetscCallAmgX(AMGX_register_print_callback(&print_callback));
623     PetscCallAmgX(AMGX_install_signal_handler());
624   }
625   /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
626   PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &amgx->comm));
627   PetscCallMPI(MPI_Comm_size(amgx->comm, &amgx->nranks));
628   PetscCallMPI(MPI_Comm_rank(amgx->comm, &amgx->rank));
629 
630   amgx_output_messages(amgx);
631   PetscFunctionReturn(0);
632 }
633 
634 /*@
635    PCAmgXGetResources - get AMGx's internal resource object
636 
637     Not Collective
638 
639    Input Parameters:
640 .  pc - the PC
641 
642    Output Parameter:
643 .  rsrc_out - pointer to the AMGx resource object
644 
645    Level: advanced
646 
647 .seealso: `PCCreate_AMGX()`
648 @*/
649 PETSC_EXTERN PetscErrorCode PCAmgXGetResources(PC pc, void *rsrc_out) {
650   PC_AMGX *amgx = (PC_AMGX *)pc->data;
651 
652   PetscFunctionBegin;
653   if (!amgx->rsrc_init) {
654     // Read configuration file
655     PetscCallAmgX(AMGX_config_create(&amgx->cfg, amgx->cfg_contents.c_str()));
656     PetscCallAmgX(AMGX_resources_create(&amgx->rsrc, amgx->cfg, &amgx->comm, 1, &amgx->devID));
657     amgx->rsrc_init = true;
658   }
659 
660   *static_cast<AMGX_resources_handle *>(rsrc_out) = amgx->rsrc;
661   PetscFunctionReturn(0);
662 }
663