xref: /petsc/src/ksp/pc/impls/amgx/amgx.cxx (revision 87edede185c0c71d403c393b2a06c8dc907b3f11)
1 /*
2      This file implements an AmgX preconditioner in PETSc as part of PC.
3  */
4 
5 /*
6    Include files needed for the AmgX preconditioner:
7      pcimpl.h - private include file intended for use by all preconditioners
8 */
9 
10 #include <petsc/private/pcimpl.h>   /*I "petscpc.h" I*/
11 #include <petscdevice.h>
12 #include <amgx_c.h>
13 #include <limits>
14 #include <vector>
15 #include <algorithm>
16 #include <map>
17 #include <numeric>
18 #include "cuda_runtime.h"
19 
20 enum class AmgXSmoother { PCG, PCGF, PBiCGStab, GMRES, FGMRES, JacobiL1,
21   BlockJacobi, GS, MulticolorGS, MulticolorILU, MulticolorDILU, ChebyshevPoly, NoSolver };
22 enum class AmgXAMGMethod { Classical, Aggregation };
23 enum class AmgXSelector { Size2, Size4, Size8, MultiPairwise, PMIS, HMIS };
24 enum class AmgXCoarseSolver { DenseLU, NoSolver };
25 enum class AmgXAMGCycle { V, W, F, CG, CGF };
26 
27 struct AmgXControlMap
28 {
29   static const std::map<std::string, AmgXAMGMethod> AMGMethods;
30   static const std::map<std::string, AmgXSmoother> Smoothers;
31   static const std::map<std::string, AmgXSelector> Selectors;
32   static const std::map<std::string, AmgXCoarseSolver> CoarseSolvers;
33   static const std::map<std::string, AmgXAMGCycle> AMGCycles;
34 };
35 
36 const std::map<std::string, AmgXAMGMethod> AmgXControlMap::AMGMethods = {
37   { "CLASSICAL", AmgXAMGMethod::Classical },
38   { "AGGREGATION", AmgXAMGMethod::Aggregation }
39 };
40 
41 const std::map<std::string, AmgXSmoother> AmgXControlMap::Smoothers = {
42   { "PCG", AmgXSmoother::PCG },
43   { "PCGF", AmgXSmoother::PCGF },
44   { "PBICGSTAB", AmgXSmoother::PBiCGStab },
45   { "GMRES", AmgXSmoother::GMRES },
46   { "FGMRES", AmgXSmoother::FGMRES },
47   { "JACOBI_L1", AmgXSmoother::JacobiL1 },
48   { "BLOCK_JACOBI", AmgXSmoother::BlockJacobi },
49   { "GS", AmgXSmoother::GS },
50   { "MULTICOLOR_GS", AmgXSmoother::MulticolorGS },
51   { "MULTICOLOR_ILU", AmgXSmoother::MulticolorILU },
52   { "MULTICOLOR_DILU", AmgXSmoother::MulticolorDILU },
53   { "CHEBYSHEV_POLY", AmgXSmoother::ChebyshevPoly },
54   { "NOSOLVER", AmgXSmoother::NoSolver }
55 };
56 
57 const std::map<std::string, AmgXSelector> AmgXControlMap::Selectors = {
58   { "SIZE_2", AmgXSelector::Size2 },
59   { "SIZE_4", AmgXSelector::Size4 },
60   { "SIZE_8", AmgXSelector::Size8 },
61   { "MULTI_PAIRWISE", AmgXSelector::MultiPairwise },
62   { "PMIS", AmgXSelector::PMIS },
63   { "HMIS", AmgXSelector::HMIS }
64 };
65 
66 const std::map<std::string, AmgXCoarseSolver> AmgXControlMap::CoarseSolvers = {
67   { "DENSE_LU_SOLVER", AmgXCoarseSolver::DenseLU },
68   { "NOSOLVER", AmgXCoarseSolver::NoSolver }
69 };
70 
71 const std::map<std::string, AmgXAMGCycle> AmgXControlMap::AMGCycles = {
72   { "V", AmgXAMGCycle::V },
73   { "W", AmgXAMGCycle::W },
74   { "F", AmgXAMGCycle::F },
75   { "CG", AmgXAMGCycle::CG },
76   { "CGF", AmgXAMGCycle::CGF }
77 };
78 
79 /*
80    Private context (data structure) for the AMGX preconditioner.
81 */
82 struct PC_AMGX {
83   AMGX_solver_handle solver;
84   AMGX_config_handle cfg;
85   AMGX_resources_handle rsrc;
86   bool solve_state_init;
87   bool rsrc_init;
88   PetscBool verbose;
89 
90   AMGX_matrix_handle A;
91   AMGX_vector_handle sol;
92   AMGX_vector_handle rhs;
93 
94   MPI_Comm comm;
95   PetscMPIInt rank = 0;
96   PetscMPIInt nranks = 0;
97   int devID = 0;
98 
99   void *lib_handle = 0;
100   std::string cfg_contents;
101 
102   // Cached state for re-setup
103   PetscInt nnz;
104   PetscInt nLocalRows;
105   PetscInt nGlobalRows;
106   PetscInt bSize;
107   Mat localA;
108   const PetscScalar *values;
109 
110   // AMG Control parameters
111   AmgXSmoother smoother;
112   AmgXAMGMethod amg_method;
113   AmgXSelector selector;
114   AmgXCoarseSolver coarse_solver;
115   AmgXAMGCycle amg_cycle;
116   PetscInt presweeps;
117   PetscInt postsweeps;
118   PetscInt max_levels;
119   PetscInt aggressive_levels;
120   PetscInt dense_lu_num_rows;
121   PetscScalar strength_threshold;
122   PetscBool print_grid_stats;
123   PetscBool exact_coarse_solve;
124 
125   // Smoother control parameters
126   PetscScalar jacobi_relaxation_factor;
127   PetscScalar gs_symmetric;
128 };
129 
130 static PetscInt s_count = 0;
131 
132 // Buffer of messages from AmgX
133 // Currently necessary hack before we adapt AmgX to print from single rank only
134 static std::string amgx_output {};
135 
136 // A print callback that allows AmgX to return status messages
137 static void print_callback(const char *msg, int length)
138 {
139   amgx_output.append(msg);
140 }
141 
142 // Outputs messages from the AmgX message buffer and clears it
143 PetscErrorCode amgx_output_messages(PC_AMGX* amgx)
144 {
145   PetscFunctionBegin;
146 
147   // If AmgX output is enabled and we have a message, output it
148   if (amgx->verbose && !amgx_output.empty()) {
149     // Only a single rank to output the AmgX messages
150     PetscCall(PetscPrintf(amgx->comm, "AMGX: %s", amgx_output.c_str()));
151 
152     // Note that all ranks clear their received output
153     amgx_output.clear();
154   }
155 
156   PetscFunctionReturn(0);
157 }
158 
159 // XXX Need to add call in AmgX API that gracefully destroys everything
160 // without abort etc.
161 #define PetscCallAmgX(rc) do { \
162   AMGX_RC err = (rc); \
163   char msg[4096]; \
164   switch (err) { \
165     case AMGX_RC_OK: \
166       break; \
167     default: \
168       AMGX_get_error_string(err, msg, 4096); \
169       SETERRQ(amgx->comm, PETSC_ERR_LIB, "%s", msg); \
170   } \
171 } while (0)
172 
173 /*
174    PCSetUp_AMGX - Prepares for the use of the AmgX preconditioner
175                     by setting data structures and options.
176 
177    Input Parameter:
178 .  pc - the preconditioner context
179 
180    Application Interface Routine: PCSetUp()
181 
182    Notes:
183    The interface routine PCSetUp() is not usually called directly by
184    the user, but instead is called by PCApply() if necessary.
185 */
186 static PetscErrorCode PCSetUp_AMGX(PC pc)
187 {
188   PC_AMGX   *amgx = (PC_AMGX *)pc->data;
189   Mat       Pmat = pc->pmat;
190   PetscBool is_dev_ptrs;
191 
192   PetscFunctionBegin;
193   PetscCall(PetscObjectTypeCompareAny((PetscObject)Pmat, &is_dev_ptrs, MATAIJCUSPARSE, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, ""));
194 
195   // At the present time, an AmgX matrix is a sequential matrix
196   // Non-sequential/MPI matrices must be adapted to extract the local matrix
197   bool partial_setup_allowed = (pc->setupcalled && pc->flag != DIFFERENT_NONZERO_PATTERN);
198   if (amgx->nranks > 1) {
199     if (partial_setup_allowed) {
200       PetscCall(MatMPIAIJGetLocalMat(Pmat, MAT_REUSE_MATRIX, &amgx->localA));
201     } else {
202       PetscCall(MatMPIAIJGetLocalMat(Pmat, MAT_INITIAL_MATRIX, &amgx->localA));
203     }
204 
205     if (is_dev_ptrs) {
206       PetscCall(MatConvert(amgx->localA,MATSEQAIJCUSPARSE,MAT_INPLACE_MATRIX,&amgx->localA));
207     }
208   } else {
209     amgx->localA = Pmat;
210   }
211 
212   if (is_dev_ptrs) {
213     PetscCall(MatSeqAIJCUSPARSEGetArrayRead(amgx->localA, &amgx->values));
214   } else {
215     PetscCall(MatSeqAIJGetArrayRead(amgx->localA, &amgx->values));
216   }
217 
218   if (!partial_setup_allowed) {
219     // Initialise resources and matrices
220     if (!amgx->rsrc_init) {
221       // Read configuration file
222       PetscCallAmgX(AMGX_config_create(&amgx->cfg, amgx->cfg_contents.c_str()));
223       PetscCallAmgX(AMGX_resources_create(&amgx->rsrc, amgx->cfg, &amgx->comm, 1, &amgx->devID));
224       amgx->rsrc_init = true;
225     }
226 
227     PetscCheck(!amgx->solve_state_init, amgx->comm, PETSC_ERR_PLIB, "AmgX solve state initialisation already called.");
228     PetscCallAmgX(AMGX_matrix_create(&amgx->A, amgx->rsrc, AMGX_mode_dDDI));
229     PetscCallAmgX(AMGX_vector_create(&amgx->sol, amgx->rsrc, AMGX_mode_dDDI));
230     PetscCallAmgX(AMGX_vector_create(&amgx->rhs, amgx->rsrc, AMGX_mode_dDDI));
231     PetscCallAmgX(AMGX_solver_create(&amgx->solver, amgx->rsrc, AMGX_mode_dDDI, amgx->cfg));
232     amgx->solve_state_init = true;
233 
234     // Extract the CSR data
235     PetscBool      done;
236     const PetscInt *colIndices;
237     const PetscInt *rowOffsets;
238     PetscCall(MatGetRowIJ(amgx->localA, 0, PETSC_FALSE, PETSC_FALSE, &amgx->nLocalRows, &rowOffsets, &colIndices, &done));
239     PetscCheck(done, amgx->comm, PETSC_ERR_PLIB, "MatGetRowIJ was not successful");
240     PetscCheck(amgx->nLocalRows < std::numeric_limits<int>::max(), PETSC_COMM_SELF,PETSC_ERR_PLIB, "AmgX restricted to int local rows but nLocalRows = %" PetscInt_FMT " > max<int>", amgx->nLocalRows);
241 
242     if (is_dev_ptrs) {
243       PetscCallCUDA(cudaMemcpy(&amgx->nnz, &rowOffsets[amgx->nLocalRows], sizeof(int), cudaMemcpyDefault));
244     } else {
245       amgx->nnz = rowOffsets[amgx->nLocalRows];
246     }
247 
248     PetscCheck(amgx->nnz < std::numeric_limits<int>::max(), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Support for 64-bit integer nnz not yet implemented, nnz = %" PetscInt_FMT ".", amgx->nnz);
249 
250     // Allocate space for some partition offsets
251     std::vector<PetscInt> partitionOffsets(amgx->nranks + 1);
252 
253     // Fetch the number of local rows per rank
254     partitionOffsets[0] = 0; /* could use PetscLayoutGetRanges */
255     PetscCallMPI(MPI_Allgather(&amgx->nLocalRows, 1, MPIU_INT, partitionOffsets.data()+1, 1, MPIU_INT, amgx->comm));
256     std::partial_sum(partitionOffsets.begin(), partitionOffsets.end(), partitionOffsets.begin());
257 
258     // Fetch the number of global rows
259     amgx->nGlobalRows = partitionOffsets[amgx->nranks];
260 
261     PetscCall(MatGetBlockSize(Pmat, &amgx->bSize));
262 
263     // XXX Currently constrained to 32-bit indices, to be changed in the future
264     // Create the distribution and upload the matrix data
265     AMGX_distribution_handle dist;
266     PetscCallAmgX(AMGX_distribution_create(&dist, amgx->cfg));
267     PetscCallAmgX(AMGX_distribution_set_32bit_colindices(dist, true));
268     PetscCallAmgX(AMGX_distribution_set_partition_data(dist, AMGX_DIST_PARTITION_OFFSETS, partitionOffsets.data()));
269     PetscCallAmgX(AMGX_matrix_upload_distributed(amgx->A, amgx->nGlobalRows, (int)amgx->nLocalRows, (int)amgx->nnz, amgx->bSize, amgx->bSize, rowOffsets, colIndices, amgx->values, NULL, dist));
270     PetscCallAmgX(AMGX_solver_setup(amgx->solver, amgx->A));
271     PetscCallAmgX(AMGX_vector_bind(amgx->sol, amgx->A));
272     PetscCallAmgX(AMGX_vector_bind(amgx->rhs, amgx->A));
273 
274     PetscInt nlr = 0;
275     PetscCall(MatRestoreRowIJ(amgx->localA, 0, PETSC_FALSE, PETSC_FALSE, &nlr, &rowOffsets, &colIndices, &done));
276   } else {
277     // The fast path for if the sparsity pattern persists
278     PetscCallAmgX(AMGX_matrix_replace_coefficients(amgx->A, amgx->nLocalRows, amgx->nnz, amgx->values, NULL));
279     PetscCallAmgX(AMGX_solver_resetup(amgx->solver, amgx->A));
280   }
281 
282   if (is_dev_ptrs) {
283     PetscCall(MatSeqAIJCUSPARSERestoreArrayRead(amgx->localA, &amgx->values));
284   } else {
285     PetscCall(MatSeqAIJRestoreArrayRead(amgx->localA, &amgx->values));
286   }
287   amgx_output_messages(amgx);
288   PetscFunctionReturn(0);
289 }
290 
291 /*
292    PCApply_AMGX - Applies the AmgX preconditioner to a vector.
293 
294    Input Parameters:
295 .  pc - the preconditioner context
296 .  b - rhs vector
297 
298    Output Parameter:
299 .  x - solution vector
300 
301    Application Interface Routine: PCApply()
302  */
303 static PetscErrorCode PCApply_AMGX(PC pc, Vec b, Vec x)
304 {
305   PC_AMGX           *amgx = (PC_AMGX *)pc->data;
306   PetscScalar       *x_;
307   const PetscScalar *b_;
308   PetscBool         is_dev_ptrs;
309 
310   PetscFunctionBegin;
311   PetscCall(PetscObjectTypeCompareAny((PetscObject)x,&is_dev_ptrs,VECCUDA,VECMPICUDA,VECSEQCUDA,""));
312 
313   if (is_dev_ptrs) {
314     PetscCall(VecCUDAGetArrayWrite(x, &x_));
315     PetscCall(VecCUDAGetArrayRead(b, &b_));
316   } else {
317     PetscCall(VecGetArrayWrite(x, &x_));
318     PetscCall(VecGetArrayRead(b, &b_));
319   }
320 
321   PetscCallAmgX(AMGX_vector_upload(amgx->sol, amgx->nLocalRows, 1, x_));
322   PetscCallAmgX(AMGX_vector_upload(amgx->rhs, amgx->nLocalRows, 1, b_));
323   PetscCallAmgX(AMGX_solver_solve_with_0_initial_guess(amgx->solver, amgx->rhs, amgx->sol));
324 
325   AMGX_SOLVE_STATUS status;
326   PetscCallAmgX(AMGX_solver_get_status(amgx->solver, &status));
327   PetscCall(PCSetErrorIfFailure(pc, static_cast<PetscBool>(status == AMGX_SOLVE_FAILED)));
328   PetscCheck(status != AMGX_SOLVE_FAILED, amgx->comm, PETSC_ERR_CONV_FAILED, "AmgX solver failed to solve the system! The error code is %d.", status);
329   PetscCallAmgX(AMGX_vector_download(amgx->sol, x_));
330 
331   if (is_dev_ptrs) {
332     PetscCall(VecCUDARestoreArrayWrite(x, &x_));
333     PetscCall(VecCUDARestoreArrayRead(b, &b_));
334   } else {
335     PetscCall(VecRestoreArrayWrite(x, &x_));
336     PetscCall(VecRestoreArrayRead(b, &b_));
337   }
338   amgx_output_messages(amgx);
339   PetscFunctionReturn(0);
340 }
341 
342 static PetscErrorCode PCReset_AMGX(PC pc)
343 {
344   PC_AMGX *amgx = (PC_AMGX *)pc->data;
345 
346   PetscFunctionBegin;
347   if (amgx->solve_state_init) {
348     PetscCallAmgX(AMGX_solver_destroy(amgx->solver));
349     PetscCallAmgX(AMGX_matrix_destroy(amgx->A));
350     PetscCallAmgX(AMGX_vector_destroy(amgx->sol));
351     PetscCallAmgX(AMGX_vector_destroy(amgx->rhs));
352     if (amgx->nranks > 1) PetscCall(MatDestroy(&amgx->localA));
353     amgx_output_messages(amgx);
354     amgx->solve_state_init = false;
355   }
356   PetscFunctionReturn(0);
357 }
358 
359 /*
360    PCDestroy_AMGX - Destroys the private context for the AmgX preconditioner
361    that was created with PCCreate_AMGX().
362 
363    Input Parameter:
364 .  pc - the preconditioner context
365 
366    Application Interface Routine: PCDestroy()
367 */
368 static PetscErrorCode PCDestroy_AMGX(PC pc)
369 {
370   PC_AMGX *amgx = (PC_AMGX *)pc->data;
371 
372   PetscFunctionBegin;
373   /* decrease the number of instances, only the last instance need to destroy resource and finalizing AmgX */
374   if (s_count == 1) {
375     /* can put this in a PCAMGXInitializePackage method */
376     PetscCheck(amgx->rsrc != nullptr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "s_rsrc == NULL");
377     PetscCallAmgX(AMGX_resources_destroy(amgx->rsrc));
378     /* destroy config (need to use AMGX_SAFE_CALL after this point) */
379     PetscCallAmgX(AMGX_config_destroy(amgx->cfg));
380     PetscCallAmgX(AMGX_finalize_plugins());
381     PetscCallAmgX(AMGX_finalize());
382     PetscCallMPI(MPI_Comm_free(&amgx->comm));
383   } else {
384     PetscCallAmgX(AMGX_config_destroy(amgx->cfg));
385   }
386   s_count -= 1;
387   PetscCall(PetscFree(amgx));
388   PetscFunctionReturn(0);
389 }
390 
391 template <class T>
392 std::string map_reverse_lookup(const std::map<std::string, T>& map, const T& key)
393 {
394   for (auto const& m : map) {
395     if (m.second == key) {
396       return m.first;
397     }
398   }
399   return "";
400 }
401 
402 static PetscErrorCode PCSetFromOptions_AMGX(PetscOptionItems *PetscOptionsObject,PC pc)
403 {
404   PC_AMGX *amgx = (PC_AMGX *)pc->data;
405   constexpr int MAX_PARAM_LEN = 128;
406   char option[MAX_PARAM_LEN];
407 
408   PetscFunctionBegin;
409   PetscOptionsHeadBegin(PetscOptionsObject, "AmgX options");
410   amgx->cfg_contents = "config_version=2,";
411   amgx->cfg_contents += "determinism_flag=1,";
412 
413   // Set exact coarse solve
414   PetscCall(PetscOptionsBool("-pc_amgx_exact_coarse_solve", "AmgX AMG Exact Coarse Solve", "", amgx->exact_coarse_solve, &amgx->exact_coarse_solve, NULL));
415   if (amgx->exact_coarse_solve) {
416     amgx->cfg_contents += "exact_coarse_solve=1,";
417   }
418 
419   amgx->cfg_contents += "solver(amg)=AMG,";
420 
421   // Set method
422   std::string def_amg_method = map_reverse_lookup(AmgXControlMap::AMGMethods, amgx->amg_method);
423   PetscCall(PetscStrcpy(option, def_amg_method.c_str()));
424   PetscCall(PetscOptionsString("-pc_amgx_amg_method", "AmgX AMG Method", "", option, option, MAX_PARAM_LEN, NULL));
425   PetscCheck(AmgXControlMap::AMGMethods.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "AMG Method %s not registered for AmgX.", option);
426   amgx->amg_method = AmgXControlMap::AMGMethods.at(option);
427   amgx->cfg_contents += "amg:algorithm=" + std::string(option) + ",";
428 
429   // Set cycle
430   std::string def_amg_cycle = map_reverse_lookup(AmgXControlMap::AMGCycles, amgx->amg_cycle);
431   PetscCall(PetscStrcpy(option, def_amg_cycle.c_str()));
432   PetscCall(PetscOptionsString("-pc_amgx_amg_cycle", "AmgX AMG Cycle", "", option, option, MAX_PARAM_LEN, NULL));
433   PetscCheck(AmgXControlMap::AMGCycles.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "AMG Cycle %s not registered for AmgX.", option);
434   amgx->amg_cycle = AmgXControlMap::AMGCycles.at(option);
435   amgx->cfg_contents += "amg:cycle=" + std::string(option) + ",";
436 
437   // Set smoother
438   std::string def_smoother = map_reverse_lookup(AmgXControlMap::Smoothers, amgx->smoother);
439   PetscCall(PetscStrcpy(option, def_smoother.c_str()));
440   PetscCall(PetscOptionsString("-pc_amgx_smoother", "AmgX Smoother", "", option, option, MAX_PARAM_LEN, NULL));
441   PetscCheck(AmgXControlMap::Smoothers.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Smoother %s not registered for AmgX.", option);
442   amgx->smoother = AmgXControlMap::Smoothers.at(option);
443   amgx->cfg_contents += "amg:smoother(smooth)=" + std::string(option) + ",";
444 
445   if (amgx->smoother == AmgXSmoother::JacobiL1 || amgx->smoother == AmgXSmoother::BlockJacobi) {
446     PetscCall(PetscOptionsScalar("-pc_amgx_jacobi_relaxation_factor", "AmgX AMG Jacobi Relaxation Factor", "", amgx->jacobi_relaxation_factor, &amgx->jacobi_relaxation_factor, NULL));
447     amgx->cfg_contents += "smooth:relaxation_factor=" + std::to_string(amgx->jacobi_relaxation_factor) + ",";
448   } else if (amgx->smoother == AmgXSmoother::GS || amgx->smoother == AmgXSmoother::MulticolorGS) {
449     PetscCall(PetscOptionsScalar("-pc_amgx_gs_symmetric", "AmgX AMG Gauss Seidel Symmetric", "", amgx->gs_symmetric, &amgx->gs_symmetric, NULL));
450     amgx->cfg_contents += "smooth:symmetric_GS=" + std::to_string(amgx->gs_symmetric) + ",";
451   }
452 
453   // Set selector
454   std::string def_selector = map_reverse_lookup(AmgXControlMap::Selectors, amgx->selector);
455   PetscCall(PetscStrcpy(option, def_selector.c_str()));
456   PetscCall(PetscOptionsString("-pc_amgx_selector", "AmgX Selector", "", option, option, MAX_PARAM_LEN, NULL));
457   PetscCheck(AmgXControlMap::Selectors.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Selector %s not registered for AmgX.", option);
458 
459   // Double check that the user has selected an appropriate selector for the AMG method
460   if (amgx->amg_method == AmgXAMGMethod::Classical) {
461     PetscCheck(amgx->selector == AmgXSelector::PMIS || amgx->selector == AmgXSelector::HMIS, amgx->comm, PETSC_ERR_PLIB, "Chosen selector is not used for AmgX Classical AMG: selector=%s", option);
462     amgx->cfg_contents += "amg:interpolator=D2,";
463   } else if (amgx->amg_method == AmgXAMGMethod::Aggregation) {
464     PetscCheck(amgx->selector == AmgXSelector::Size2 || amgx->selector == AmgXSelector::Size4 || amgx->selector == AmgXSelector::Size8 || amgx->selector == AmgXSelector::MultiPairwise, amgx->comm, PETSC_ERR_PLIB, "Chosen selector is not used for AmgX Aggregation AMG");
465   }
466   amgx->selector = AmgXControlMap::Selectors.at(option);
467   amgx->cfg_contents += "amg:selector=" + std::string(option) + ",";
468 
469   // Set presweeps
470   PetscCall(PetscOptionsInt("-pc_amgx_presweeps", "AmgX AMG Presweep Count", "", amgx->presweeps, &amgx->presweeps, NULL));
471   amgx->cfg_contents += "amg:presweeps=" + std::to_string(amgx->presweeps) + ",";
472 
473   // Set postsweeps
474   PetscCall(PetscOptionsInt("-pc_amgx_postsweeps", "AmgX AMG Postsweep Count", "", amgx->postsweeps, &amgx->postsweeps, NULL));
475   amgx->cfg_contents += "amg:postsweeps=" + std::to_string(amgx->postsweeps) + ",";
476 
477   // Set max levels
478   PetscCall(PetscOptionsInt("-pc_amgx_max_levels", "AmgX AMG Max Level Count", "", amgx->max_levels, &amgx->max_levels, NULL));
479   amgx->cfg_contents += "amg:max_levels=100,";
480 
481   // Set dense LU num rows
482   PetscCall(PetscOptionsInt("-pc_amgx_dense_lu_num_rows", "AmgX Dense LU Number of Rows", "", amgx->dense_lu_num_rows, &amgx->dense_lu_num_rows, NULL));
483   amgx->cfg_contents += "amg:dense_lu_num_rows=" + std::to_string(amgx->dense_lu_num_rows) + ",";
484 
485   // Set strength threshold
486   PetscCall(PetscOptionsScalar("-pc_amgx_strength_threshold", "AmgX AMG Strength Threshold", "", amgx->strength_threshold, &amgx->strength_threshold, NULL));
487   amgx->cfg_contents += "amg:strength_threshold=" + std::to_string(amgx->strength_threshold) + ",";
488 
489   // Set aggressive_levels
490   PetscCall(PetscOptionsInt("-pc_amgx_aggressive_levels", "AmgX AMG Presweep Count", "", amgx->aggressive_levels, &amgx->aggressive_levels, NULL));
491   if (amgx->aggressive_levels > 0) {
492     amgx->cfg_contents += "amg:aggressive_levels=" + std::to_string(amgx->aggressive_levels) + ",";
493   }
494 
495   // Set coarse solver
496   std::string def_coarse_solver = map_reverse_lookup(AmgXControlMap::CoarseSolvers, amgx->coarse_solver);
497   PetscCall(PetscStrcpy(option, def_coarse_solver.c_str()));
498   PetscCall(PetscOptionsString("-pc_amgx_coarse_solver", "AmgX CoarseSolver", "", option, option, MAX_PARAM_LEN, NULL));
499   PetscCheck(AmgXControlMap::CoarseSolvers.count(option) == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "CoarseSolver %s not registered for AmgX.", option);
500   amgx->coarse_solver = AmgXControlMap::CoarseSolvers.at(option);
501   amgx->cfg_contents += "amg:coarse_solver=" + std::string(option) + ",";
502 
503   // Set max iterations
504   amgx->cfg_contents += "amg:max_iters=1,";
505 
506   // Set output control parameters
507   PetscCall(PetscOptionsBool("-pc_amgx_print_grid_stats", "AmgX Print Grid Stats", "", amgx->print_grid_stats, &amgx->print_grid_stats, NULL));
508 
509   if (amgx->print_grid_stats) {
510     amgx->cfg_contents += "amg:print_grid_stats=1,";
511   }
512   amgx->cfg_contents += "amg:monitor_residual=0";
513 
514   // Set whether AmgX output will be seen
515   PetscCall(PetscOptionsBool("-pc_amgx_verbose", "Enable output from AmgX", "", amgx->verbose, &amgx->verbose, NULL));
516   PetscOptionsHeadEnd();
517   PetscFunctionReturn(0);
518 }
519 
520 static PetscErrorCode PCView_AMGX(PC pc, PetscViewer viewer)
521 {
522   PC_AMGX *amgx = (PC_AMGX *)pc->data;
523   PetscBool iascii;
524 
525   PetscFunctionBegin;
526   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
527   if (iascii) {
528     std::string output_cfg(amgx->cfg_contents);
529     std::replace(output_cfg.begin(), output_cfg.end(), ',', '\n');
530     PetscCall(PetscViewerASCIIPrintf(viewer, "\n%s\n", output_cfg.c_str()));
531   }
532   PetscFunctionReturn(0);
533 }
534 
535 /*
536    PCCreate_AMGX - Creates a AmgX preconditioner context, PC_AMGX,
537    and sets this as the private data within the generic preconditioning
538    context, PC, that was created within PCCreate().
539 
540    Input Parameter:
541 .  pc - the preconditioner context
542 
543    Application Interface Routine: PCCreate()
544 */
545 
546 /*MC
547      PCAMGX - Interface to NVIDIA's AmgX algebraic multigrid
548 
549    Options Database Keys:
550 +    -pc_amgx_amg_method <CLASSICAL,AGGREGATION> - set the AMG algorithm to use
551 .    -pc_amgx_amg_cycle <V,W,F,CG> - set the AMG cycle type
552 .    -pc_amgx_smoother <PCG,PCGF,PBICGSTAB,GMRES,FGMRES,JACOBI_L1,BLOCK_JACOBI,GS,MULTICOLOR_GS,MULTICOLOR_ILU,MULTICOLOR_DILU,CHEBYSHEV_POLY,NOSOLVER> - set the AMG pre/post smoother
553 .    -pc_amgx_jacobi_relaxation_factor - set the relaxation factor for Jacobi smoothing
554 .    -pc_amgx_gs_symmetric - enforce symmetric Gauss-Seidel smoothing (only applies if GS smoothing is selected)
555 .    -pc_amgx_selector <SIZE_2,SIZE_4,SIZE_8,MULTI_PAIRWISE,PMIS,HMIS> - set the AMG coarse selector
556 .    -pc_amgx_presweeps - set the number of AMG pre-sweeps
557 .    -pc_amgx_postsweeps - set the number of AMG post-sweeps
558 .    -pc_amgx_max_levels - set the maximum number of levels in the AMG level hierarchy
559 .    -pc_amgx_strength_threshold - set the strength threshold for the AMG coarsening
560 .    -pc_amgx_aggressive_levels - set the number of levels (from the finest) that should apply aggressive coarsening
561 .    -pc_amgx_coarse_solver <DENSE_LU_SOLVER,NOSOLVER> - set the coarse solve
562 .    -pc_amgx_print_grid_stats - output the AMG grid hierarchy to stdout
563 -    -pc_amgx_verbose - enable AmgX output
564 
565    Level: intermediate
566 
567    Notes:
568      Preconditioner supplied by the GPU accelerated library AmgX. Implementation will accept host or device pointers, but good performance will require that the KSP is also GPU accelerated so that data is not frequently transferred between host and device.
569 
570 .seealso:  `PCGAMG`, `PCHYPRE`, `PCMG`, `PCAmgXGetResources()`, `PCCreate()`, `PCSetType()`, `PCType` (for list of available types), `PC`
571 M*/
572 
573 PETSC_EXTERN PetscErrorCode PCCreate_AMGX(PC pc)
574 {
575   PC_AMGX *amgx;
576 
577   PetscFunctionBegin;
578   PetscCall(PetscNewLog(pc, &amgx));
579   pc->ops->apply = PCApply_AMGX;
580   pc->ops->setfromoptions = PCSetFromOptions_AMGX;
581   pc->ops->setup = PCSetUp_AMGX;
582   pc->ops->view = PCView_AMGX;
583   pc->ops->destroy = PCDestroy_AMGX;
584   pc->ops->reset = PCReset_AMGX;
585   pc->data = (void *)amgx;
586 
587   // Set the defaults
588   amgx->selector = AmgXSelector::PMIS;
589   amgx->smoother = AmgXSmoother::BlockJacobi;
590   amgx->amg_method = AmgXAMGMethod::Classical;
591   amgx->coarse_solver = AmgXCoarseSolver::DenseLU;
592   amgx->amg_cycle = AmgXAMGCycle::V;
593   amgx->exact_coarse_solve = PETSC_TRUE;
594   amgx->presweeps = 1;
595   amgx->postsweeps = 1;
596   amgx->max_levels = 100;
597   amgx->strength_threshold = 0.5;
598   amgx->aggressive_levels = 0;
599   amgx->dense_lu_num_rows = 1;
600   amgx->jacobi_relaxation_factor = 0.9;
601   amgx->gs_symmetric = PETSC_FALSE;
602   amgx->print_grid_stats = PETSC_FALSE;
603   amgx->verbose = PETSC_FALSE;
604   amgx->rsrc_init = false;
605   amgx->solve_state_init = false;
606 
607   s_count++;
608 
609   PetscCallCUDA(cudaGetDevice(&amgx->devID));
610   if (s_count == 1) {
611     PetscCallAmgX(AMGX_initialize());
612     PetscCallAmgX(AMGX_initialize_plugins());
613     PetscCallAmgX(AMGX_register_print_callback(&print_callback));
614     PetscCallAmgX(AMGX_install_signal_handler());
615   }
616   /* This communicator is not yet known to this system, so we duplicate it and make an internal communicator */
617   PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &amgx->comm));
618   PetscCallMPI(MPI_Comm_size(amgx->comm, &amgx->nranks));
619   PetscCallMPI(MPI_Comm_rank(amgx->comm, &amgx->rank));
620 
621   amgx_output_messages(amgx);
622   PetscFunctionReturn(0);
623 }
624 
625 /*@
626    PCAmgXGetResources - get AMGx's internal resource object
627 
628     Not Collective
629 
630    Input Parameters:
631 .  pc - the PC
632 
633    Output Parameter:
634 .  rsrc_out - pointer to the AMGx resource object
635 
636    Level: advanced
637 
638 .seealso: `PCCreate_AMGX()`
639 @*/
640 PETSC_EXTERN PetscErrorCode PCAmgXGetResources(PC pc, void* rsrc_out)
641 {
642   PC_AMGX *amgx = (PC_AMGX *)pc->data;
643 
644   PetscFunctionBegin;
645   if (!amgx->rsrc_init) {
646     // Read configuration file
647     PetscCallAmgX(AMGX_config_create(&amgx->cfg, amgx->cfg_contents.c_str()));
648     PetscCallAmgX(AMGX_resources_create(&amgx->rsrc, amgx->cfg, &amgx->comm, 1, &amgx->devID));
649     amgx->rsrc_init = true;
650   }
651 
652   *static_cast<AMGX_resources_handle*>(rsrc_out) = amgx->rsrc;
653   PetscFunctionReturn(0);
654 }
655 
656