xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision 4977266fffaaedcd41e5a67923646569c4e329aa)
1 /*
2   Contributed by Patrick Sanan and Sascha M. Schnepp
3 */
4 
5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I  "petscksp.h"  I*/
6 
7 static PetscBool  cited      = PETSC_FALSE;
8 static const char citation[] = "@article{SSM2016,\n"
9                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
10                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
11                                "  journal = {SIAM Journal on Scientific Computing},\n"
12                                "  volume = {38},\n"
13                                "  number = {5},\n"
14                                "  pages = {C441-C470},\n"
15                                "  year = {2016},\n"
16                                "  doi = {10.1137/15M1049130},\n"
17                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
18                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
19                                "}\n";
20 
21 #define PIPEFGMRES_DELTA_DIRECTIONS 10
22 #define PIPEFGMRES_DEFAULT_MAXK     30
23 
24 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP, PetscInt);
25 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *);
26 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
27 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
28 
29 /*
30 
31     KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres.
32 
33     This is called once, usually automatically by KSPSolve() or KSPSetUp(),
34     but can be called directly by KSPSetUp().
35 
36 */
37 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) {
38   PetscInt        k;
39   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
40   const PetscInt  max_k      = pipefgmres->max_k;
41 
42   PetscFunctionBegin;
43   PetscCall(KSPSetUp_GMRES(ksp));
44 
45   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs));
46   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs_user_work));
47   PetscCall(PetscLogObjectMemory((PetscObject)ksp, (VEC_OFFSET + max_k) * (2 * sizeof(void *))));
48 
49   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->prevecs_user_work[0], 0, NULL));
50   PetscCall(PetscLogObjectParents(ksp, pipefgmres->vv_allocated, pipefgmres->prevecs_user_work[0]));
51   for (k = 0; k < pipefgmres->vv_allocated; k++) { pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k]; }
52 
53   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs));
54   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs_user_work));
55   PetscCall(PetscLogObjectMemory((PetscObject)ksp, (VEC_OFFSET + max_k) * (2 * sizeof(void *))));
56 
57   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->redux));
58   PetscCall(PetscLogObjectMemory((PetscObject)ksp, (VEC_OFFSET + max_k) * (sizeof(void *))));
59 
60   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->zvecs_user_work[0], 0, NULL));
61   PetscCall(PetscLogObjectParents(ksp, pipefgmres->vv_allocated, pipefgmres->zvecs_user_work[0]));
62   for (k = 0; k < pipefgmres->vv_allocated; k++) { pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k]; }
63 
64   PetscFunctionReturn(0);
65 }
66 
67 /*
68 
69     KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart.  Return residual
70                   history if requested.
71 
72     input parameters:
73 .        pipefgmres  - structure containing parameters and work areas
74 
75     output parameters:
76 .        itcount - number of iterations used.  If null, ignored.
77 .        converged - 0 if not converged
78 
79     Notes:
80     On entry, the value in vector VEC_VV(0) should be
81     the initial residual.
82 
83 */
84 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount, KSP ksp) {
85   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
86   PetscReal       res_norm;
87   PetscReal       hapbnd, tt;
88   PetscScalar    *hh, *hes, *lhh, shift = pipefgmres->shift;
89   PetscBool       hapend = PETSC_FALSE;      /* indicates happy breakdown ending */
90   PetscInt        loc_it;                    /* local count of # of dir. in Krylov space */
91   PetscInt        max_k = pipefgmres->max_k; /* max # of directions Krylov space */
92   PetscInt        i, j, k;
93   Mat             Amat, Pmat;
94   Vec             Q, W;                      /* Pipelining vectors */
95   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
96 
97   PetscFunctionBegin;
98   if (itcount) *itcount = 0;
99 
100   /* Assign simpler names to these vectors, allocated as pipelining workspace */
101   Q = VEC_Q;
102   W = VEC_W;
103 
104   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
105   /* Note that we add an extra value here to allow for a single reduction */
106   if (!pipefgmres->orthogwork) { PetscCall(PetscMalloc1(pipefgmres->max_k + 2, &pipefgmres->orthogwork)); }
107   lhh = pipefgmres->orthogwork;
108 
109   /* Number of pseudo iterations since last restart is the number
110      of prestart directions */
111   loc_it = 0;
112 
113   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
114      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
115      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
116      (loc_it -1) is passed, so the two are equivalent */
117   pipefgmres->it = (loc_it - 1);
118 
119   /* initial residual is in VEC_VV(0)  - compute its norm*/
120   PetscCall(VecNorm(VEC_VV(0), NORM_2, &res_norm));
121 
122   /* first entry in right-hand-side of hessenberg system is just
123      the initial residual norm */
124   *RS(0) = res_norm;
125 
126   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
127   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
128   else ksp->rnorm = 0;
129   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
130   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
131   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
132 
133   /* check for the convergence - maybe the current guess is good enough */
134   PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
135   if (ksp->reason) {
136     if (itcount) *itcount = 0;
137     PetscFunctionReturn(0);
138   }
139 
140   /* scale VEC_VV (the initial residual) */
141   PetscCall(VecScale(VEC_VV(0), 1.0 / res_norm));
142 
143   /* Fill the pipeline */
144   PetscCall(KSP_PCApply(ksp, VEC_VV(loc_it), PREVEC(loc_it)));
145   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
146   PetscCall(KSP_MatMult(ksp, Amat, PREVEC(loc_it), ZVEC(loc_it)));
147   PetscCall(VecAXPY(ZVEC(loc_it), -shift, VEC_VV(loc_it))); /* Note shift */
148 
149   /* MAIN ITERATION LOOP BEGINNING*/
150   /* keep iterating until we have converged OR generated the max number
151      of directions OR reached the max number of iterations for the method */
152   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
153     if (loc_it) {
154       PetscCall(KSPLogResidualHistory(ksp, res_norm));
155       PetscCall(KSPMonitor(ksp, ksp->its, res_norm));
156     }
157     pipefgmres->it = (loc_it - 1);
158 
159     /* see if more space is needed for work vectors */
160     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
161       PetscCall(KSPPIPEFGMRESGetNewVectors(ksp, loc_it + 1));
162       /* (loc_it+1) is passed in as number of the first vector that should
163          be allocated */
164     }
165 
166     /* Note that these inner products are with "Z" now, so
167        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
168        not the value from the equivalent FGMRES run (even in exact arithmetic)
169        That is, the H we need for the Arnoldi relation is different from the
170        coefficients we use in the orthogonalization process,because of the shift */
171 
172     /* Do some local twiddling to allow for a single reduction */
173     for (i = 0; i < loc_it + 1; i++) { redux[i] = VEC_VV(i); }
174     redux[loc_it + 1] = ZVEC(loc_it);
175 
176     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
177     PetscCall(VecMDotBegin(ZVEC(loc_it), loc_it + 2, redux, lhh));
178 
179     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
180     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it))));
181 
182     /* The work to be overlapped with the inner products follows.
183        This is application of the preconditioner and the operator
184        to compute intermediate quantites which will be combined (locally)
185        with the results of the inner products.
186        */
187     PetscCall(KSP_PCApply(ksp, ZVEC(loc_it), Q));
188     PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
189     PetscCall(KSP_MatMult(ksp, Amat, Q, W));
190 
191     /* Compute inner products of the new direction with previous directions,
192        and the norm of the to-be-orthogonalized direction "Z".
193        This information is enough to build the required entries
194        of H. The inner product with VEC_VV(it_loc) is
195        *different* than in the standard FGMRES and need to be dealt with specially.
196        That is, for standard FGMRES the orthogonalization coefficients are the same
197        as the coefficients used in the Arnoldi relation to reconstruct, but here this
198        is not true (albeit only for the one entry of H which we "unshift" below. */
199 
200     /* Finish the dot product, retrieving the extra entry */
201     PetscCall(VecMDotEnd(ZVEC(loc_it), loc_it + 2, redux, lhh));
202     tt = PetscRealPart(lhh[loc_it + 1]);
203 
204     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
205       Note that the Hessenberg entries require a shift, as these are for the
206       relation AU = VH, which is wrt unshifted basis vectors */
207     hh  = HH(0, loc_it);
208     hes = HES(0, loc_it);
209     for (j = 0; j < loc_it; j++) {
210       hh[j]  = lhh[j];
211       hes[j] = lhh[j];
212     }
213     hh[loc_it]  = lhh[loc_it] + shift;
214     hes[loc_it] = lhh[loc_it] + shift;
215 
216     /* we delay applying the shift here */
217     for (j = 0; j <= loc_it; j++) { lhh[j] = -lhh[j]; /* flip sign */ }
218 
219     /* Compute the norm of the un-normalized new direction using the rearranged formula
220        Note that these are shifted ("barred") quantities */
221     for (k = 0; k <= loc_it; k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
222     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
223     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0;
224     if (tt < 0.0) {
225       /* If we detect square root breakdown in the norm, we must restart the algorithm.
226          Here this means we simply break the current loop and reconstruct the solution
227          using the basis we have computed thus far. Note that by breaking immediately,
228          we do not update the iteration count, so computation done in this iteration
229          should be disregarded.
230          */
231       PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n", ksp->its, (double)tt));
232       break;
233     } else {
234       tt = PetscSqrtReal(tt);
235     }
236 
237     /* new entry in hessenburg is the 2-norm of our new direction */
238     hh[loc_it + 1]  = tt;
239     hes[loc_it + 1] = tt;
240 
241     /* The recurred computation for the new direction
242        The division by tt is delayed to the happy breakdown check later
243        Note placement BEFORE the unshift
244        */
245     PetscCall(VecCopy(ZVEC(loc_it), VEC_VV(loc_it + 1)));
246     PetscCall(VecMAXPY(VEC_VV(loc_it + 1), loc_it + 1, lhh, &VEC_VV(0)));
247     /* (VEC_VV(loc_it+1) is not normalized yet) */
248 
249     /* The recurred computation for the preconditioned vector (u) */
250     PetscCall(VecCopy(Q, PREVEC(loc_it + 1)));
251     PetscCall(VecMAXPY(PREVEC(loc_it + 1), loc_it + 1, lhh, &PREVEC(0)));
252     PetscCall(VecScale(PREVEC(loc_it + 1), 1.0 / tt));
253 
254     /* Unshift an entry in the GS coefficients ("removing the bar") */
255     lhh[loc_it] -= shift;
256 
257     /* The recurred computation for z (Au)
258        Note placement AFTER the "unshift" */
259     PetscCall(VecCopy(W, ZVEC(loc_it + 1)));
260     PetscCall(VecMAXPY(ZVEC(loc_it + 1), loc_it + 1, lhh, &ZVEC(0)));
261     PetscCall(VecScale(ZVEC(loc_it + 1), 1.0 / tt));
262 
263     /* Happy Breakdown Check */
264     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
265     /* RS(loc_it) contains the res_norm from the last iteration  */
266     hapbnd = PetscMin(pipefgmres->haptol, hapbnd);
267     if (tt > hapbnd) {
268       /* scale new direction by its norm  */
269       PetscCall(VecScale(VEC_VV(loc_it + 1), 1.0 / tt));
270     } else {
271       /* This happens when the solution is exactly reached. */
272       /* So there is no new direction... */
273       PetscCall(VecSet(VEC_TEMP, 0.0)); /* set VEC_TEMP to 0 */
274       hapend = PETSC_TRUE;
275     }
276     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
277        current solution would not be exact if HES was singular.  Note that
278        HH non-singular implies that HES is not singular, and HES is guaranteed
279        to be nonsingular when PREVECS are linearly independent and A is
280        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
281        of HES). So we should really add a check to verify that HES is nonsingular.*/
282 
283     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
284        here to check that the hessenberg matrix is indeed non-singular (since
285        FGMRES does not guarantee this) */
286 
287     /* Now apply rotations to new col of hessenberg (and right side of system),
288        calculate new rotation, and get new residual norm at the same time*/
289     PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp, loc_it, &hapend, &res_norm));
290     if (ksp->reason) break;
291 
292     loc_it++;
293     pipefgmres->it = (loc_it - 1); /* Add this here in case it has converged */
294 
295     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
296     ksp->its++;
297     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
298     else ksp->rnorm = 0;
299     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
300 
301     PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
302 
303     /* Catch error in happy breakdown and signal convergence and break from loop */
304     if (hapend) {
305       if (!ksp->reason) {
306         PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "You reached the happy break down, but convergence was not indicated. Residual norm = %g", (double)res_norm);
307         ksp->reason = KSP_DIVERGED_BREAKDOWN;
308         break;
309       }
310     }
311   }
312   /* END OF ITERATION LOOP */
313   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
314 
315   /*
316      Monitor if we know that we will not return for a restart */
317   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) { PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm)); }
318 
319   if (itcount) *itcount = loc_it;
320 
321   /*
322     Down here we have to solve for the "best" coefficients of the Krylov
323     columns, add the solution values together, and possibly unwind the
324     preconditioning from the solution
325    */
326 
327   /* Form the solution (or the solution so far) */
328   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln
329      properly navigates */
330 
331   PetscCall(KSPPIPEFGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, loc_it - 1));
332 
333   PetscFunctionReturn(0);
334 }
335 
336 /*
337     KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method.
338 
339    Input Parameter:
340 .     ksp - the Krylov space object that was set to use pipefgmres
341 
342    Output Parameter:
343 .     outits - number of iterations used
344 
345 */
346 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) {
347   PetscInt        its, itcount;
348   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
349   PetscBool       guess_zero = ksp->guess_zero;
350 
351   PetscFunctionBegin;
352   /* We have not checked these routines for use with complex numbers. The inner products
353      are likely not defined correctly for that case */
354   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
355 
356   PetscCall(PetscCitationsRegister(citation, &cited));
357 
358   PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
359   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
360   ksp->its = 0;
361   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
362 
363   itcount     = 0;
364   ksp->reason = KSP_CONVERGED_ITERATING;
365   while (!ksp->reason) {
366     PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
367     PetscCall(KSPPIPEFGMRESCycle(&its, ksp));
368     itcount += its;
369     if (itcount >= ksp->max_it) {
370       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
371       break;
372     }
373     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
374   }
375   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
376   PetscFunctionReturn(0);
377 }
378 
379 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) {
380   PetscFunctionBegin;
381   PetscCall(KSPReset_PIPEFGMRES(ksp));
382   PetscCall(KSPDestroy_GMRES(ksp));
383   PetscFunctionReturn(0);
384 }
385 
386 /*
387     KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the
388                       current iterates.
389 
390     Input parameters:
391         nrs - work area of size it + 1.
392         vguess  - index of initial guess
393         vdest - index of result.  Note that vguess may == vdest (replace
394                 guess with the solution).
395         it - HH upper triangular part is a block of size (it+1) x (it+1)
396 
397      This is an internal routine that knows about the PIPEFGMRES internals.
398  */
399 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it) {
400   PetscScalar     tt;
401   PetscInt        k, j;
402   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
403 
404   PetscFunctionBegin;
405   /* Solve for solution vector that minimizes the residual */
406 
407   if (it < 0) {                        /* no pipefgmres steps have been performed */
408     PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */
409     PetscFunctionReturn(0);
410   }
411 
412   /* solve the upper triangular system - RS is the right side and HH is
413      the upper triangular matrix  - put soln in nrs */
414   if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it);
415   else nrs[it] = 0.0;
416 
417   for (k = it - 1; k >= 0; k--) {
418     tt = *RS(k);
419     for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j];
420     nrs[k] = tt / *HH(k, k);
421   }
422 
423   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
424   PetscCall(VecZeroEntries(VEC_TEMP));
425   PetscCall(VecMAXPY(VEC_TEMP, it + 1, nrs, &PREVEC(0)));
426 
427   /* add solution to previous solution */
428   if (vdest == vguess) {
429     PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
430   } else {
431     PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess));
432   }
433   PetscFunctionReturn(0);
434 }
435 
436 /*
437 
438     KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
439                             Return new residual.
440 
441     input parameters:
442 
443 .        ksp -    Krylov space object
444 .        it  -    plane rotations are applied to the (it+1)th column of the
445                   modified hessenberg (i.e. HH(:,it))
446 .        hapend - PETSC_FALSE not happy breakdown ending.
447 
448     output parameters:
449 .        res - the new residual
450 
451  */
452 /*
453 .  it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this
454  */
455 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res) {
456   PetscScalar    *hh, *cc, *ss, *rs;
457   PetscInt        j;
458   PetscReal       hapbnd;
459   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
460 
461   PetscFunctionBegin;
462   hh = HH(0, it); /* pointer to beginning of column to update */
463   cc = CC(0);     /* beginning of cosine rotations */
464   ss = SS(0);     /* beginning of sine rotations */
465   rs = RS(0);     /* right hand side of least squares system */
466 
467   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
468   for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j];
469 
470   /* check for the happy breakdown */
471   hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pipefgmres->haptol);
472   if (PetscAbsScalar(hh[it + 1]) < hapbnd) {
473     PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it))));
474     *hapend = PETSC_TRUE;
475   }
476 
477   /* Apply all the previously computed plane rotations to the new column
478      of the Hessenberg matrix */
479   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
480      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
481 
482   for (j = 0; j < it; j++) {
483     PetscScalar hhj = hh[j];
484     hh[j]           = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1];
485     hh[j + 1]       = -ss[j] * hhj + cc[j] * hh[j + 1];
486   }
487 
488   /*
489     compute the new plane rotation, and apply it to:
490      1) the right-hand-side of the Hessenberg system (RS)
491         note: it affects RS(it) and RS(it+1)
492      2) the new column of the Hessenberg matrix
493         note: it affects HH(it,it) which is currently pointed to
494         by hh and HH(it+1, it) (*(hh+1))
495     thus obtaining the updated value of the residual...
496   */
497 
498   /* compute new plane rotation */
499 
500   if (!*hapend) {
501     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1])));
502     if (delta == 0.0) {
503       ksp->reason = KSP_DIVERGED_NULL;
504       PetscFunctionReturn(0);
505     }
506 
507     cc[it] = hh[it] / delta;     /* new cosine value */
508     ss[it] = hh[it + 1] / delta; /* new sine value */
509 
510     hh[it]     = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1];
511     rs[it + 1] = -ss[it] * rs[it];
512     rs[it]     = PetscConj(cc[it]) * rs[it];
513     *res       = PetscAbsScalar(rs[it + 1]);
514   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
515             another rotation matrix (so RH doesn't change).  The new residual is
516             always the new sine term times the residual from last time (RS(it)),
517             but now the new sine rotation would be zero...so the residual should
518             be zero...so we will multiply "zero" by the last residual.  This might
519             not be exactly what we want to do here -could just return "zero". */
520 
521     *res = 0.0;
522   }
523   PetscFunctionReturn(0);
524 }
525 
526 /*
527    KSPBuildSolution_PIPEFGMRES
528 
529      Input Parameter:
530 .     ksp - the Krylov space object
531 .     ptr-
532 
533    Output Parameter:
534 .     result - the solution
535 
536    Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle
537    calls directly.
538 
539 */
540 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp, Vec ptr, Vec *result) {
541   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
542 
543   PetscFunctionBegin;
544   if (!ptr) {
545     if (!pipefgmres->sol_temp) {
546       PetscCall(VecDuplicate(ksp->vec_sol, &pipefgmres->sol_temp));
547       PetscCall(PetscLogObjectParent((PetscObject)ksp, (PetscObject)pipefgmres->sol_temp));
548     }
549     ptr = pipefgmres->sol_temp;
550   }
551   if (!pipefgmres->nrs) {
552     /* allocate the work area */
553     PetscCall(PetscMalloc1(pipefgmres->max_k, &pipefgmres->nrs));
554     PetscCall(PetscLogObjectMemory((PetscObject)ksp, pipefgmres->max_k * sizeof(PetscScalar)));
555   }
556 
557   PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs, ksp->vec_sol, ptr, ksp, pipefgmres->it));
558   if (result) *result = ptr;
559   PetscFunctionReturn(0);
560 }
561 
562 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject) {
563   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
564   PetscBool       flg;
565   PetscScalar     shift;
566 
567   PetscFunctionBegin;
568   PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
569   PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined FGMRES Options");
570   PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift", "shift parameter", "KSPPIPEFGMRESSetShift", pipefgmres->shift, &shift, &flg));
571   if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp, shift));
572   PetscOptionsHeadEnd();
573   PetscFunctionReturn(0);
574 }
575 
576 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp, PetscViewer viewer) {
577   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
578   PetscBool       iascii, isstring;
579 
580   PetscFunctionBegin;
581   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
582   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
583 
584   if (iascii) {
585     PetscCall(PetscViewerASCIIPrintf(viewer, "  restart=%" PetscInt_FMT "\n", pipefgmres->max_k));
586     PetscCall(PetscViewerASCIIPrintf(viewer, "  happy breakdown tolerance %g\n", (double)pipefgmres->haptol));
587 #if defined(PETSC_USE_COMPLEX)
588     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
589 #else
590     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g\n", (double)pipefgmres->shift));
591 #endif
592   } else if (isstring) {
593     PetscCall(PetscViewerStringSPrintf(viewer, "restart %" PetscInt_FMT, pipefgmres->max_k));
594 #if defined(PETSC_USE_COMPLEX)
595     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
596 #else
597     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g\n", (double)pipefgmres->shift));
598 #endif
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) {
604   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
605   PetscInt        i;
606 
607   PetscFunctionBegin;
608   PetscCall(PetscFree(pipefgmres->prevecs));
609   PetscCall(PetscFree(pipefgmres->zvecs));
610   for (i = 0; i < pipefgmres->nwork_alloc; i++) {
611     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->prevecs_user_work[i]));
612     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->zvecs_user_work[i]));
613   }
614   PetscCall(PetscFree(pipefgmres->prevecs_user_work));
615   PetscCall(PetscFree(pipefgmres->zvecs_user_work));
616   PetscCall(PetscFree(pipefgmres->redux));
617   PetscCall(KSPReset_GMRES(ksp));
618   PetscFunctionReturn(0);
619 }
620 
621 /*MC
622    KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method.
623 
624    A flexible, 1-stage pipelined variant of GMRES.
625 
626    Options Database Keys:
627 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
628 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
629 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
630 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
631                              vectors are allocated as needed)
632 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
633 
634    Level: intermediate
635 
636    Notes:
637 
638    This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter.
639 
640    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
641 
642    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES).
643    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
644    See the FAQ on the PETSc website for details.
645 
646    Developer Notes:
647     This class is subclassed off of KSPGMRES.
648 
649    Reference:
650     P. Sanan, S.M. Schnepp, and D.A. May,
651     "Pipelined, Flexible Krylov Subspace Methods,"
652     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
653     DOI: 10.1137/15M1049130
654 
655 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES`
656           `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()`
657 M*/
658 
659 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) {
660   KSP_PIPEFGMRES *pipefgmres;
661 
662   PetscFunctionBegin;
663   PetscCall(PetscNewLog(ksp, &pipefgmres));
664 
665   ksp->data                              = (void *)pipefgmres;
666   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
667   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
668   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
669   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
670   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
671   ksp->ops->view                         = KSPView_PIPEFGMRES;
672   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
673   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
674   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
675 
676   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 3));
677   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
678 
679   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
680   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
681   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES));
682 
683   pipefgmres->nextra_vecs    = 1;
684   pipefgmres->haptol         = 1.0e-30;
685   pipefgmres->q_preallocate  = 0;
686   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
687   pipefgmres->orthog         = NULL;
688   pipefgmres->nrs            = NULL;
689   pipefgmres->sol_temp       = NULL;
690   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
691   pipefgmres->Rsvd           = NULL;
692   pipefgmres->orthogwork     = NULL;
693   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
694   pipefgmres->shift          = 1.0;
695   PetscFunctionReturn(0);
696 }
697 
698 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp, PetscInt it) {
699   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
700   PetscInt        nwork      = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
701   PetscInt        nalloc;                               /* number to allocate */
702   PetscInt        k;
703 
704   PetscFunctionBegin;
705   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
706                                       in a single chunk */
707 
708   /* Adjust the number to allocate to make sure that we don't exceed the
709      number of available slots (pipefgmres->vecs_allocated)*/
710   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) { nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET; }
711   if (!nalloc) PetscFunctionReturn(0);
712 
713   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
714 
715   /* work vectors */
716   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->user_work[nwork], 0, NULL));
717   PetscCall(PetscLogObjectParents(ksp, nalloc, pipefgmres->user_work[nwork]));
718   for (k = 0; k < nalloc; k++) { pipefgmres->vecs[it + VEC_OFFSET + k] = pipefgmres->user_work[nwork][k]; }
719   /* specify size of chunk allocated */
720   pipefgmres->mwork_alloc[nwork] = nalloc;
721 
722   /* preconditioned vectors (note we don't use VEC_OFFSET) */
723   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->prevecs_user_work[nwork], 0, NULL));
724   PetscCall(PetscLogObjectParents(ksp, nalloc, pipefgmres->prevecs_user_work[nwork]));
725   for (k = 0; k < nalloc; k++) { pipefgmres->prevecs[it + k] = pipefgmres->prevecs_user_work[nwork][k]; }
726 
727   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->zvecs_user_work[nwork], 0, NULL));
728   PetscCall(PetscLogObjectParents(ksp, nalloc, pipefgmres->zvecs_user_work[nwork]));
729   for (k = 0; k < nalloc; k++) { pipefgmres->zvecs[it + k] = pipefgmres->zvecs_user_work[nwork][k]; }
730 
731   /* increment the number of work vector chunks */
732   pipefgmres->nwork_alloc++;
733   PetscFunctionReturn(0);
734 }
735 
736 /*@
737   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver.
738 
739   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there).
740 
741 Logically Collective on ksp
742 
743 Input Parameters:
744 +  ksp - the Krylov space context
745 -  shift - the shift
746 
747 Level: intermediate
748 
749 Options Database:
750 . -ksp_pipefgmres_shift <shift> - set the shift parameter
751 
752 .seealso: `KSPComputeEigenvalues()`
753 @*/
754 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp, PetscScalar shift) {
755   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
756 
757   PetscFunctionBegin;
758   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
759   PetscValidLogicalCollectiveScalar(ksp, shift, 2);
760   pipefgmres->shift = shift;
761   PetscFunctionReturn(0);
762 }
763