xref: /petsc/src/ksp/ksp/impls/gmres/pgmres/pgmres.c (revision d529f056d75cd16b380adc3e50ccb741df53d0de)
1 
2 /*
3     This file implements PGMRES (a Pipelined Generalized Minimal Residual method)
4 */
5 
6 #include <../src/ksp/ksp/impls/gmres/pgmres/pgmresimpl.h> /*I  "petscksp.h"  I*/
7 
8 static PetscErrorCode KSPPGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *);
9 static PetscErrorCode KSPPGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
10 
11 static PetscErrorCode KSPSetUp_PGMRES(KSP ksp)
12 {
13   PetscFunctionBegin;
14   PetscCall(KSPSetUp_GMRES(ksp));
15   PetscFunctionReturn(PETSC_SUCCESS);
16 }
17 
18 static PetscErrorCode KSPPGMRESCycle(PetscInt *itcount, KSP ksp)
19 {
20   KSP_PGMRES *pgmres = (KSP_PGMRES *)(ksp->data);
21   PetscReal   res_norm, res, newnorm;
22   PetscInt    it     = 0, j, k;
23   PetscBool   hapend = PETSC_FALSE;
24 
25   PetscFunctionBegin;
26   if (itcount) *itcount = 0;
27   PetscCall(VecNormalize(VEC_VV(0), &res_norm));
28   KSPCheckNorm(ksp, res_norm);
29   res    = res_norm;
30   *RS(0) = res_norm;
31 
32   /* check for the convergence */
33   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
34   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
35   else ksp->rnorm = 0;
36   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
37   pgmres->it = it - 2;
38   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
39   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
40   if (!res) {
41     ksp->reason = KSP_CONVERGED_ATOL;
42     PetscCall(PetscInfo(ksp, "Converged due to zero residual norm on entry\n"));
43     PetscFunctionReturn(PETSC_SUCCESS);
44   }
45 
46   PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
47   for (; !ksp->reason; it++) {
48     Vec Zcur, Znext;
49     if (pgmres->vv_allocated <= it + VEC_OFFSET + 1) PetscCall(KSPGMRESGetNewVectors(ksp, it + 1));
50     /* VEC_VV(it-1) is orthogonal, it will be normalized once the VecNorm arrives. */
51     Zcur  = VEC_VV(it);     /* Zcur is not yet orthogonal, but the VecMDot to orthogonalize it has been started. */
52     Znext = VEC_VV(it + 1); /* This iteration will compute Znext, update with a deferred correction once we know how
53                                Zcur relates to the previous vectors, and start the reduction to orthogonalize it. */
54 
55     if (it < pgmres->max_k + 1 && ksp->its + 1 < PetscMax(2, ksp->max_it)) { /* We don't know whether what we have computed is enough, so apply the matrix. */
56       PetscCall(KSP_PCApplyBAorAB(ksp, Zcur, Znext, VEC_TEMP_MATOP));
57     }
58 
59     if (it > 1) { /* Complete the pending reduction */
60       PetscCall(VecNormEnd(VEC_VV(it - 1), NORM_2, &newnorm));
61       *HH(it - 1, it - 2) = newnorm;
62     }
63     if (it > 0) { /* Finish the reduction computing the latest column of H */
64       PetscCall(VecMDotEnd(Zcur, it, &(VEC_VV(0)), HH(0, it - 1)));
65     }
66 
67     if (it > 1) {
68       /* normalize the base vector from two iterations ago, basis is complete up to here */
69       PetscCall(VecScale(VEC_VV(it - 1), 1. / *HH(it - 1, it - 2)));
70 
71       PetscCall(KSPPGMRESUpdateHessenberg(ksp, it - 2, &hapend, &res));
72       pgmres->it = it - 2;
73       ksp->its++;
74       if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res;
75       else ksp->rnorm = 0;
76 
77       PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
78       if (it < pgmres->max_k + 1 || ksp->reason || ksp->its == ksp->max_it) { /* Monitor if we are done or still iterating, but not before a restart. */
79         PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
80         PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
81       }
82       if (ksp->reason) break;
83       /* Catch error in happy breakdown and signal convergence and break from loop */
84       if (hapend) {
85         PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "Reached happy break down, but convergence was not indicated. Residual norm = %g", (double)res);
86         ksp->reason = KSP_DIVERGED_BREAKDOWN;
87         break;
88       }
89 
90       if (!(it < pgmres->max_k + 1 && ksp->its < ksp->max_it)) break;
91 
92       /* The it-2 column of H was not scaled when we computed Zcur, apply correction */
93       PetscCall(VecScale(Zcur, 1. / *HH(it - 1, it - 2)));
94       /* And Znext computed in this iteration was computed using the under-scaled Zcur */
95       PetscCall(VecScale(Znext, 1. / *HH(it - 1, it - 2)));
96 
97       /* In the previous iteration, we projected an unnormalized Zcur against the Krylov basis, so we need to fix the column of H resulting from that projection. */
98       for (k = 0; k < it; k++) *HH(k, it - 1) /= *HH(it - 1, it - 2);
99       /* When Zcur was projected against the Krylov basis, VV(it-1) was still not normalized, so fix that too. This
100        * column is complete except for HH(it,it-1) which we won't know until the next iteration. */
101       *HH(it - 1, it - 1) /= *HH(it - 1, it - 2);
102     }
103 
104     if (it > 0) {
105       PetscScalar *work;
106       if (!pgmres->orthogwork) PetscCall(PetscMalloc1(pgmres->max_k + 2, &pgmres->orthogwork));
107       work = pgmres->orthogwork;
108       /* Apply correction computed by the VecMDot in the last iteration to Znext. The original form is
109        *
110        *   Znext -= sum_{j=0}^{i-1} Z[j+1] * H[j,i-1]
111        *
112        * where
113        *
114        *   Z[j] = sum_{k=0}^j V[k] * H[k,j-1]
115        *
116        * substituting
117        *
118        *   Znext -= sum_{j=0}^{i-1} sum_{k=0}^{j+1} V[k] * H[k,j] * H[j,i-1]
119        *
120        * rearranging the iteration space from row-column to column-row
121        *
122        *   Znext -= sum_{k=0}^i sum_{j=k-1}^{i-1} V[k] * H[k,j] * H[j,i-1]
123        *
124        * Note that column it-1 of HH is correct. For all previous columns, we must look at HES because HH has already
125        * been transformed to upper triangular form.
126        */
127       for (k = 0; k < it + 1; k++) {
128         work[k] = 0;
129         for (j = PetscMax(0, k - 1); j < it - 1; j++) work[k] -= *HES(k, j) * *HH(j, it - 1);
130       }
131       PetscCall(VecMAXPY(Znext, it + 1, work, &VEC_VV(0)));
132       PetscCall(VecAXPY(Znext, -*HH(it - 1, it - 1), Zcur));
133 
134       /* Orthogonalize Zcur against existing basis vectors. */
135       for (k = 0; k < it; k++) work[k] = -*HH(k, it - 1);
136       PetscCall(VecMAXPY(Zcur, it, work, &VEC_VV(0)));
137       /* Zcur is now orthogonal, and will be referred to as VEC_VV(it) again, though it is still not normalized. */
138       /* Begin computing the norm of the new vector, will be normalized after the MatMult in the next iteration. */
139       PetscCall(VecNormBegin(VEC_VV(it), NORM_2, &newnorm));
140     }
141 
142     /* Compute column of H (to the diagonal, but not the subdiagonal) to be able to orthogonalize the newest vector. */
143     PetscCall(VecMDotBegin(Znext, it + 1, &VEC_VV(0), HH(0, it)));
144 
145     /* Start an asynchronous split-mode reduction, the result of the MDot and Norm will be collected on the next iteration. */
146     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Znext)));
147   }
148   if (itcount) *itcount = it - 1; /* Number of iterations actually completed. */
149 
150   /*
151     Solve for the "best" coefficients of the Krylov
152     columns, add the solution values together, and possibly unwind the preconditioning from the solution
153    */
154   /* Form the solution (or the solution so far) */
155   PetscCall(KSPPGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, it - 2));
156   PetscFunctionReturn(PETSC_SUCCESS);
157 }
158 
159 static PetscErrorCode KSPSolve_PGMRES(KSP ksp)
160 {
161   PetscInt    its, itcount;
162   KSP_PGMRES *pgmres     = (KSP_PGMRES *)ksp->data;
163   PetscBool   guess_zero = ksp->guess_zero;
164 
165   PetscFunctionBegin;
166   PetscCheck(!ksp->calc_sings || pgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
167   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
168   ksp->its = 0;
169   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
170 
171   itcount     = 0;
172   ksp->reason = KSP_CONVERGED_ITERATING;
173   while (!ksp->reason) {
174     PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
175     PetscCall(KSPPGMRESCycle(&its, ksp));
176     itcount += its;
177     if (itcount >= ksp->max_it) {
178       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
179       break;
180     }
181     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
182   }
183   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
184   PetscFunctionReturn(PETSC_SUCCESS);
185 }
186 
187 static PetscErrorCode KSPDestroy_PGMRES(KSP ksp)
188 {
189   PetscFunctionBegin;
190   PetscCall(KSPDestroy_GMRES(ksp));
191   PetscFunctionReturn(PETSC_SUCCESS);
192 }
193 
194 static PetscErrorCode KSPPGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it)
195 {
196   PetscScalar tt;
197   PetscInt    k, j;
198   KSP_PGMRES *pgmres = (KSP_PGMRES *)(ksp->data);
199 
200   PetscFunctionBegin;
201   /* Solve for solution vector that minimizes the residual */
202 
203   if (it < 0) {                        /* no pgmres steps have been performed */
204     PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */
205     PetscFunctionReturn(PETSC_SUCCESS);
206   }
207 
208   /* solve the upper triangular system - RS is the right side and HH is
209      the upper triangular matrix  - put soln in nrs */
210   if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it);
211   else nrs[it] = 0.0;
212 
213   for (k = it - 1; k >= 0; k--) {
214     tt = *RS(k);
215     for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j];
216     nrs[k] = tt / *HH(k, k);
217   }
218 
219   /* Accumulate the correction to the solution of the preconditioned problem in TEMP */
220   PetscCall(VecMAXPBY(VEC_TEMP, it + 1, nrs, 0, &VEC_VV(0)));
221   PetscCall(KSPUnwindPreconditioner(ksp, VEC_TEMP, VEC_TEMP_MATOP));
222   /* add solution to previous solution */
223   if (vdest == vguess) {
224     PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
225   } else {
226     PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess));
227   }
228   PetscFunctionReturn(PETSC_SUCCESS);
229 }
230 
231 static PetscErrorCode KSPPGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res)
232 {
233   PetscScalar *hh, *cc, *ss, *rs;
234   PetscInt     j;
235   PetscReal    hapbnd;
236   KSP_PGMRES  *pgmres = (KSP_PGMRES *)(ksp->data);
237 
238   PetscFunctionBegin;
239   hh = HH(0, it); /* pointer to beginning of column to update */
240   cc = CC(0);     /* beginning of cosine rotations */
241   ss = SS(0);     /* beginning of sine rotations */
242   rs = RS(0);     /* right hand side of least squares system */
243 
244   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
245   for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j];
246 
247   /* check for the happy breakdown */
248   hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pgmres->haptol);
249   if (PetscAbsScalar(hh[it + 1]) < hapbnd) {
250     PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it))));
251     *hapend = PETSC_TRUE;
252   }
253 
254   /* Apply all the previously computed plane rotations to the new column of the Hessenberg matrix */
255   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
256      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
257 
258   for (j = 0; j < it; j++) {
259     PetscScalar hhj = hh[j];
260     hh[j]           = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1];
261     hh[j + 1]       = -ss[j] * hhj + cc[j] * hh[j + 1];
262   }
263 
264   /*
265     compute the new plane rotation, and apply it to:
266      1) the right-hand-side of the Hessenberg system (RS)
267         note: it affects RS(it) and RS(it+1)
268      2) the new column of the Hessenberg matrix
269         note: it affects HH(it,it) which is currently pointed to
270         by hh and HH(it+1, it) (*(hh+1))
271     thus obtaining the updated value of the residual...
272   */
273 
274   /* compute new plane rotation */
275 
276   if (!*hapend) {
277     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1])));
278     if (delta == 0.0) {
279       ksp->reason = KSP_DIVERGED_NULL;
280       PetscFunctionReturn(PETSC_SUCCESS);
281     }
282 
283     cc[it] = hh[it] / delta;     /* new cosine value */
284     ss[it] = hh[it + 1] / delta; /* new sine value */
285 
286     hh[it]     = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1];
287     rs[it + 1] = -ss[it] * rs[it];
288     rs[it]     = PetscConj(cc[it]) * rs[it];
289     *res       = PetscAbsScalar(rs[it + 1]);
290   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
291             another rotation matrix (so RH doesn't change).  The new residual is
292             always the new sine term times the residual from last time (RS(it)),
293             but now the new sine rotation would be zero...so the residual should
294             be zero...so we will multiply "zero" by the last residual.  This might
295             not be exactly what we want to do here -could just return "zero". */
296     *res = 0.0;
297   }
298   PetscFunctionReturn(PETSC_SUCCESS);
299 }
300 
301 static PetscErrorCode KSPBuildSolution_PGMRES(KSP ksp, Vec ptr, Vec *result)
302 {
303   KSP_PGMRES *pgmres = (KSP_PGMRES *)ksp->data;
304 
305   PetscFunctionBegin;
306   if (!ptr) {
307     if (!pgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &pgmres->sol_temp));
308     ptr = pgmres->sol_temp;
309   }
310   if (!pgmres->nrs) {
311     /* allocate the work area */
312     PetscCall(PetscMalloc1(pgmres->max_k, &pgmres->nrs));
313   }
314 
315   PetscCall(KSPPGMRESBuildSoln(pgmres->nrs, ksp->vec_sol, ptr, ksp, pgmres->it));
316   if (result) *result = ptr;
317   PetscFunctionReturn(PETSC_SUCCESS);
318 }
319 
320 static PetscErrorCode KSPSetFromOptions_PGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject)
321 {
322   PetscFunctionBegin;
323   PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
324   PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined GMRES Options");
325   PetscOptionsHeadEnd();
326   PetscFunctionReturn(PETSC_SUCCESS);
327 }
328 
329 static PetscErrorCode KSPReset_PGMRES(KSP ksp)
330 {
331   PetscFunctionBegin;
332   PetscCall(KSPReset_GMRES(ksp));
333   PetscFunctionReturn(PETSC_SUCCESS);
334 }
335 
336 /*MC
337      KSPPGMRES - Implements the Pipelined Generalized Minimal Residual method. [](sec_pipelineksp)
338 
339    Options Database Keys:
340 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
341 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
342 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of vectors are allocated as needed)
343 .   -ksp_gmres_classicalgramschmidt - use classical (unmodified) Gram-Schmidt to orthogonalize against the Krylov space (fast) (the default)
344 .   -ksp_gmres_modifiedgramschmidt - use modified Gram-Schmidt in the orthogonalization (more stable, but slower)
345 .   -ksp_gmres_cgs_refinement_type <refine_never,refine_ifneeded,refine_always> - determine if iterative refinement is used to increase the
346                                    stability of the classical Gram-Schmidt  orthogonalization.
347 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
348 
349    Level: beginner
350 
351    Note:
352    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
353    See [](doc_faq_pipelined)
354 
355    References:
356 .  * - Ghysels, Ashby, Meerbergen, Vanroose, Hiding global communication latencies in the GMRES algorithm on massively parallel machines, 2012.
357 
358    Developer Note:
359    This object is subclassed off of `KSPGMRES`, see the source code in src/ksp/ksp/impls/gmres for comments on the structure of the code
360 
361 .seealso: [](ch_ksp), [](sec_pipelineksp), [](doc_faq_pipelined), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPGMRES`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`,
362           `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESSetOrthogonalization()`, `KSPGMRESGetOrthogonalization()`,
363           `KSPGMRESClassicalGramSchmidtOrthogonalization()`, `KSPGMRESModifiedGramSchmidtOrthogonalization()`,
364           `KSPGMRESCGSRefinementType`, `KSPGMRESSetCGSRefinementType()`, `KSPGMRESGetCGSRefinementType()`, `KSPGMRESMonitorKrylov()`
365 M*/
366 
367 PETSC_EXTERN PetscErrorCode KSPCreate_PGMRES(KSP ksp)
368 {
369   KSP_PGMRES *pgmres;
370 
371   PetscFunctionBegin;
372   PetscCall(PetscNew(&pgmres));
373 
374   ksp->data                              = (void *)pgmres;
375   ksp->ops->buildsolution                = KSPBuildSolution_PGMRES;
376   ksp->ops->setup                        = KSPSetUp_PGMRES;
377   ksp->ops->solve                        = KSPSolve_PGMRES;
378   ksp->ops->reset                        = KSPReset_PGMRES;
379   ksp->ops->destroy                      = KSPDestroy_PGMRES;
380   ksp->ops->view                         = KSPView_GMRES;
381   ksp->ops->setfromoptions               = KSPSetFromOptions_PGMRES;
382   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
383   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
384 
385   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_PRECONDITIONED, PC_LEFT, 3));
386   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 2));
387   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
388 
389   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
390   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetOrthogonalization_C", KSPGMRESSetOrthogonalization_GMRES));
391   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetOrthogonalization_C", KSPGMRESGetOrthogonalization_GMRES));
392   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
393   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES));
394   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetCGSRefinementType_C", KSPGMRESSetCGSRefinementType_GMRES));
395   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetCGSRefinementType_C", KSPGMRESGetCGSRefinementType_GMRES));
396 
397   pgmres->nextra_vecs    = 1;
398   pgmres->haptol         = 1.0e-30;
399   pgmres->q_preallocate  = 0;
400   pgmres->delta_allocate = PGMRES_DELTA_DIRECTIONS;
401   pgmres->orthog         = KSPGMRESClassicalGramSchmidtOrthogonalization;
402   pgmres->nrs            = NULL;
403   pgmres->sol_temp       = NULL;
404   pgmres->max_k          = PGMRES_DEFAULT_MAXK;
405   pgmres->Rsvd           = NULL;
406   pgmres->orthogwork     = NULL;
407   pgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
408   PetscFunctionReturn(PETSC_SUCCESS);
409 }
410