xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision 7c441f3aff93c611491d4ea0564d57010b1fd4e9)
1 /*
2   Contributed by Patrick Sanan and Sascha M. Schnepp
3 */
4 
5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I  "petscksp.h"  I*/
6 
7 static PetscBool  cited      = PETSC_FALSE;
8 static const char citation[] = "@article{SSM2016,\n"
9                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
10                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
11                                "  journal = {SIAM Journal on Scientific Computing},\n"
12                                "  volume = {38},\n"
13                                "  number = {5},\n"
14                                "  pages = {C441-C470},\n"
15                                "  year = {2016},\n"
16                                "  doi = {10.1137/15M1049130},\n"
17                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
18                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
19                                "}\n";
20 
21 #define PIPEFGMRES_DELTA_DIRECTIONS 10
22 #define PIPEFGMRES_DEFAULT_MAXK     30
23 
24 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP, PetscInt);
25 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *);
26 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
27 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
28 
29 /*
30 
31     KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres.
32 
33     This is called once, usually automatically by KSPSolve() or KSPSetUp(),
34     but can be called directly by KSPSetUp().
35 
36 */
37 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
38 {
39   PetscInt        k;
40   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
41   const PetscInt  max_k      = pipefgmres->max_k;
42 
43   PetscFunctionBegin;
44   PetscCall(KSPSetUp_GMRES(ksp));
45 
46   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs));
47   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs_user_work));
48 
49   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->prevecs_user_work[0], 0, NULL));
50   for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
51 
52   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs));
53   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs_user_work));
54 
55   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->redux));
56 
57   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->zvecs_user_work[0], 0, NULL));
58   for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
59 
60   PetscFunctionReturn(0);
61 }
62 
63 /*
64 
65     KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart.  Return residual
66                   history if requested.
67 
68     input parameters:
69 .        pipefgmres  - structure containing parameters and work areas
70 
71     output parameters:
72 .        itcount - number of iterations used.  If null, ignored.
73 .        converged - 0 if not converged
74 
75     Notes:
76     On entry, the value in vector VEC_VV(0) should be
77     the initial residual.
78 
79 */
80 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount, KSP ksp)
81 {
82   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
83   PetscReal       res_norm;
84   PetscReal       hapbnd, tt;
85   PetscScalar    *hh, *hes, *lhh, shift = pipefgmres->shift;
86   PetscBool       hapend = PETSC_FALSE;      /* indicates happy breakdown ending */
87   PetscInt        loc_it;                    /* local count of # of dir. in Krylov space */
88   PetscInt        max_k = pipefgmres->max_k; /* max # of directions Krylov space */
89   PetscInt        i, j, k;
90   Mat             Amat, Pmat;
91   Vec             Q, W;                      /* Pipelining vectors */
92   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
93 
94   PetscFunctionBegin;
95   if (itcount) *itcount = 0;
96 
97   /* Assign simpler names to these vectors, allocated as pipelining workspace */
98   Q = VEC_Q;
99   W = VEC_W;
100 
101   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
102   /* Note that we add an extra value here to allow for a single reduction */
103   if (!pipefgmres->orthogwork) PetscCall(PetscMalloc1(pipefgmres->max_k + 2, &pipefgmres->orthogwork));
104   lhh = pipefgmres->orthogwork;
105 
106   /* Number of pseudo iterations since last restart is the number
107      of prestart directions */
108   loc_it = 0;
109 
110   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
111      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
112      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
113      (loc_it -1) is passed, so the two are equivalent */
114   pipefgmres->it = (loc_it - 1);
115 
116   /* initial residual is in VEC_VV(0)  - compute its norm*/
117   PetscCall(VecNorm(VEC_VV(0), NORM_2, &res_norm));
118 
119   /* first entry in right-hand-side of hessenberg system is just
120      the initial residual norm */
121   *RS(0) = res_norm;
122 
123   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
124   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
125   else ksp->rnorm = 0;
126   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
127   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
128   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
129 
130   /* check for the convergence - maybe the current guess is good enough */
131   PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
132   if (ksp->reason) {
133     if (itcount) *itcount = 0;
134     PetscFunctionReturn(0);
135   }
136 
137   /* scale VEC_VV (the initial residual) */
138   PetscCall(VecScale(VEC_VV(0), 1.0 / res_norm));
139 
140   /* Fill the pipeline */
141   PetscCall(KSP_PCApply(ksp, VEC_VV(loc_it), PREVEC(loc_it)));
142   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
143   PetscCall(KSP_MatMult(ksp, Amat, PREVEC(loc_it), ZVEC(loc_it)));
144   PetscCall(VecAXPY(ZVEC(loc_it), -shift, VEC_VV(loc_it))); /* Note shift */
145 
146   /* MAIN ITERATION LOOP BEGINNING*/
147   /* keep iterating until we have converged OR generated the max number
148      of directions OR reached the max number of iterations for the method */
149   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
150     if (loc_it) {
151       PetscCall(KSPLogResidualHistory(ksp, res_norm));
152       PetscCall(KSPMonitor(ksp, ksp->its, res_norm));
153     }
154     pipefgmres->it = (loc_it - 1);
155 
156     /* see if more space is needed for work vectors */
157     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
158       PetscCall(KSPPIPEFGMRESGetNewVectors(ksp, loc_it + 1));
159       /* (loc_it+1) is passed in as number of the first vector that should
160          be allocated */
161     }
162 
163     /* Note that these inner products are with "Z" now, so
164        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
165        not the value from the equivalent FGMRES run (even in exact arithmetic)
166        That is, the H we need for the Arnoldi relation is different from the
167        coefficients we use in the orthogonalization process,because of the shift */
168 
169     /* Do some local twiddling to allow for a single reduction */
170     for (i = 0; i < loc_it + 1; i++) redux[i] = VEC_VV(i);
171     redux[loc_it + 1] = ZVEC(loc_it);
172 
173     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
174     PetscCall(VecMDotBegin(ZVEC(loc_it), loc_it + 2, redux, lhh));
175 
176     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
177     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it))));
178 
179     /* The work to be overlapped with the inner products follows.
180        This is application of the preconditioner and the operator
181        to compute intermediate quantites which will be combined (locally)
182        with the results of the inner products.
183        */
184     PetscCall(KSP_PCApply(ksp, ZVEC(loc_it), Q));
185     PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
186     PetscCall(KSP_MatMult(ksp, Amat, Q, W));
187 
188     /* Compute inner products of the new direction with previous directions,
189        and the norm of the to-be-orthogonalized direction "Z".
190        This information is enough to build the required entries
191        of H. The inner product with VEC_VV(it_loc) is
192        *different* than in the standard FGMRES and need to be dealt with specially.
193        That is, for standard FGMRES the orthogonalization coefficients are the same
194        as the coefficients used in the Arnoldi relation to reconstruct, but here this
195        is not true (albeit only for the one entry of H which we "unshift" below. */
196 
197     /* Finish the dot product, retrieving the extra entry */
198     PetscCall(VecMDotEnd(ZVEC(loc_it), loc_it + 2, redux, lhh));
199     tt = PetscRealPart(lhh[loc_it + 1]);
200 
201     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
202       Note that the Hessenberg entries require a shift, as these are for the
203       relation AU = VH, which is wrt unshifted basis vectors */
204     hh  = HH(0, loc_it);
205     hes = HES(0, loc_it);
206     for (j = 0; j < loc_it; j++) {
207       hh[j]  = lhh[j];
208       hes[j] = lhh[j];
209     }
210     hh[loc_it]  = lhh[loc_it] + shift;
211     hes[loc_it] = lhh[loc_it] + shift;
212 
213     /* we delay applying the shift here */
214     for (j = 0; j <= loc_it; j++) { lhh[j] = -lhh[j]; /* flip sign */ }
215 
216     /* Compute the norm of the un-normalized new direction using the rearranged formula
217        Note that these are shifted ("barred") quantities */
218     for (k = 0; k <= loc_it; k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
219     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
220     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0;
221     if (tt < 0.0) {
222       /* If we detect square root breakdown in the norm, we must restart the algorithm.
223          Here this means we simply break the current loop and reconstruct the solution
224          using the basis we have computed thus far. Note that by breaking immediately,
225          we do not update the iteration count, so computation done in this iteration
226          should be disregarded.
227          */
228       PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n", ksp->its, (double)tt));
229       break;
230     } else {
231       tt = PetscSqrtReal(tt);
232     }
233 
234     /* new entry in hessenburg is the 2-norm of our new direction */
235     hh[loc_it + 1]  = tt;
236     hes[loc_it + 1] = tt;
237 
238     /* The recurred computation for the new direction
239        The division by tt is delayed to the happy breakdown check later
240        Note placement BEFORE the unshift
241        */
242     PetscCall(VecCopy(ZVEC(loc_it), VEC_VV(loc_it + 1)));
243     PetscCall(VecMAXPY(VEC_VV(loc_it + 1), loc_it + 1, lhh, &VEC_VV(0)));
244     /* (VEC_VV(loc_it+1) is not normalized yet) */
245 
246     /* The recurred computation for the preconditioned vector (u) */
247     PetscCall(VecCopy(Q, PREVEC(loc_it + 1)));
248     PetscCall(VecMAXPY(PREVEC(loc_it + 1), loc_it + 1, lhh, &PREVEC(0)));
249     PetscCall(VecScale(PREVEC(loc_it + 1), 1.0 / tt));
250 
251     /* Unshift an entry in the GS coefficients ("removing the bar") */
252     lhh[loc_it] -= shift;
253 
254     /* The recurred computation for z (Au)
255        Note placement AFTER the "unshift" */
256     PetscCall(VecCopy(W, ZVEC(loc_it + 1)));
257     PetscCall(VecMAXPY(ZVEC(loc_it + 1), loc_it + 1, lhh, &ZVEC(0)));
258     PetscCall(VecScale(ZVEC(loc_it + 1), 1.0 / tt));
259 
260     /* Happy Breakdown Check */
261     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
262     /* RS(loc_it) contains the res_norm from the last iteration  */
263     hapbnd = PetscMin(pipefgmres->haptol, hapbnd);
264     if (tt > hapbnd) {
265       /* scale new direction by its norm  */
266       PetscCall(VecScale(VEC_VV(loc_it + 1), 1.0 / tt));
267     } else {
268       /* This happens when the solution is exactly reached. */
269       /* So there is no new direction... */
270       PetscCall(VecSet(VEC_TEMP, 0.0)); /* set VEC_TEMP to 0 */
271       hapend = PETSC_TRUE;
272     }
273     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
274        current solution would not be exact if HES was singular.  Note that
275        HH non-singular implies that HES is not singular, and HES is guaranteed
276        to be nonsingular when PREVECS are linearly independent and A is
277        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
278        of HES). So we should really add a check to verify that HES is nonsingular.*/
279 
280     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
281        here to check that the hessenberg matrix is indeed non-singular (since
282        FGMRES does not guarantee this) */
283 
284     /* Now apply rotations to new col of hessenberg (and right side of system),
285        calculate new rotation, and get new residual norm at the same time*/
286     PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp, loc_it, &hapend, &res_norm));
287     if (ksp->reason) break;
288 
289     loc_it++;
290     pipefgmres->it = (loc_it - 1); /* Add this here in case it has converged */
291 
292     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
293     ksp->its++;
294     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
295     else ksp->rnorm = 0;
296     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
297 
298     PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
299 
300     /* Catch error in happy breakdown and signal convergence and break from loop */
301     if (hapend) {
302       if (!ksp->reason) {
303         PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "You reached the happy break down, but convergence was not indicated. Residual norm = %g", (double)res_norm);
304         ksp->reason = KSP_DIVERGED_BREAKDOWN;
305         break;
306       }
307     }
308   }
309   /* END OF ITERATION LOOP */
310   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
311 
312   /*
313      Monitor if we know that we will not return for a restart */
314   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
315 
316   if (itcount) *itcount = loc_it;
317 
318   /*
319     Down here we have to solve for the "best" coefficients of the Krylov
320     columns, add the solution values together, and possibly unwind the
321     preconditioning from the solution
322    */
323 
324   /* Form the solution (or the solution so far) */
325   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln
326      properly navigates */
327 
328   PetscCall(KSPPIPEFGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, loc_it - 1));
329 
330   PetscFunctionReturn(0);
331 }
332 
333 /*
334     KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method.
335 
336    Input Parameter:
337 .     ksp - the Krylov space object that was set to use pipefgmres
338 
339    Output Parameter:
340 .     outits - number of iterations used
341 
342 */
343 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
344 {
345   PetscInt        its, itcount;
346   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
347   PetscBool       guess_zero = ksp->guess_zero;
348 
349   PetscFunctionBegin;
350   /* We have not checked these routines for use with complex numbers. The inner products
351      are likely not defined correctly for that case */
352   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
353 
354   PetscCall(PetscCitationsRegister(citation, &cited));
355 
356   PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
357   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
358   ksp->its = 0;
359   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
360 
361   itcount     = 0;
362   ksp->reason = KSP_CONVERGED_ITERATING;
363   while (!ksp->reason) {
364     PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
365     PetscCall(KSPPIPEFGMRESCycle(&its, ksp));
366     itcount += its;
367     if (itcount >= ksp->max_it) {
368       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
369       break;
370     }
371     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
372   }
373   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
374   PetscFunctionReturn(0);
375 }
376 
377 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
378 {
379   PetscFunctionBegin;
380   PetscCall(KSPReset_PIPEFGMRES(ksp));
381   PetscCall(KSPDestroy_GMRES(ksp));
382   PetscFunctionReturn(0);
383 }
384 
385 /*
386     KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the
387                       current iterates.
388 
389     Input parameters:
390         nrs - work area of size it + 1.
391         vguess  - index of initial guess
392         vdest - index of result.  Note that vguess may == vdest (replace
393                 guess with the solution).
394         it - HH upper triangular part is a block of size (it+1) x (it+1)
395 
396      This is an internal routine that knows about the PIPEFGMRES internals.
397  */
398 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it)
399 {
400   PetscScalar     tt;
401   PetscInt        k, j;
402   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
403 
404   PetscFunctionBegin;
405   /* Solve for solution vector that minimizes the residual */
406 
407   if (it < 0) {                        /* no pipefgmres steps have been performed */
408     PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */
409     PetscFunctionReturn(0);
410   }
411 
412   /* solve the upper triangular system - RS is the right side and HH is
413      the upper triangular matrix  - put soln in nrs */
414   if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it);
415   else nrs[it] = 0.0;
416 
417   for (k = it - 1; k >= 0; k--) {
418     tt = *RS(k);
419     for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j];
420     nrs[k] = tt / *HH(k, k);
421   }
422 
423   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
424   PetscCall(VecZeroEntries(VEC_TEMP));
425   PetscCall(VecMAXPY(VEC_TEMP, it + 1, nrs, &PREVEC(0)));
426 
427   /* add solution to previous solution */
428   if (vdest == vguess) {
429     PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
430   } else {
431     PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess));
432   }
433   PetscFunctionReturn(0);
434 }
435 
436 /*
437 
438     KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
439                             Return new residual.
440 
441     input parameters:
442 
443 .        ksp -    Krylov space object
444 .        it  -    plane rotations are applied to the (it+1)th column of the
445                   modified hessenberg (i.e. HH(:,it))
446 .        hapend - PETSC_FALSE not happy breakdown ending.
447 
448     output parameters:
449 .        res - the new residual
450 
451  */
452 /*
453 .  it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this
454  */
455 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res)
456 {
457   PetscScalar    *hh, *cc, *ss, *rs;
458   PetscInt        j;
459   PetscReal       hapbnd;
460   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
461 
462   PetscFunctionBegin;
463   hh = HH(0, it); /* pointer to beginning of column to update */
464   cc = CC(0);     /* beginning of cosine rotations */
465   ss = SS(0);     /* beginning of sine rotations */
466   rs = RS(0);     /* right hand side of least squares system */
467 
468   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
469   for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j];
470 
471   /* check for the happy breakdown */
472   hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pipefgmres->haptol);
473   if (PetscAbsScalar(hh[it + 1]) < hapbnd) {
474     PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it))));
475     *hapend = PETSC_TRUE;
476   }
477 
478   /* Apply all the previously computed plane rotations to the new column
479      of the Hessenberg matrix */
480   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
481      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
482 
483   for (j = 0; j < it; j++) {
484     PetscScalar hhj = hh[j];
485     hh[j]           = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1];
486     hh[j + 1]       = -ss[j] * hhj + cc[j] * hh[j + 1];
487   }
488 
489   /*
490     compute the new plane rotation, and apply it to:
491      1) the right-hand-side of the Hessenberg system (RS)
492         note: it affects RS(it) and RS(it+1)
493      2) the new column of the Hessenberg matrix
494         note: it affects HH(it,it) which is currently pointed to
495         by hh and HH(it+1, it) (*(hh+1))
496     thus obtaining the updated value of the residual...
497   */
498 
499   /* compute new plane rotation */
500 
501   if (!*hapend) {
502     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1])));
503     if (delta == 0.0) {
504       ksp->reason = KSP_DIVERGED_NULL;
505       PetscFunctionReturn(0);
506     }
507 
508     cc[it] = hh[it] / delta;     /* new cosine value */
509     ss[it] = hh[it + 1] / delta; /* new sine value */
510 
511     hh[it]     = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1];
512     rs[it + 1] = -ss[it] * rs[it];
513     rs[it]     = PetscConj(cc[it]) * rs[it];
514     *res       = PetscAbsScalar(rs[it + 1]);
515   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
516             another rotation matrix (so RH doesn't change).  The new residual is
517             always the new sine term times the residual from last time (RS(it)),
518             but now the new sine rotation would be zero...so the residual should
519             be zero...so we will multiply "zero" by the last residual.  This might
520             not be exactly what we want to do here -could just return "zero". */
521 
522     *res = 0.0;
523   }
524   PetscFunctionReturn(0);
525 }
526 
527 /*
528    KSPBuildSolution_PIPEFGMRES
529 
530      Input Parameter:
531 .     ksp - the Krylov space object
532 .     ptr-
533 
534    Output Parameter:
535 .     result - the solution
536 
537    Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle
538    calls directly.
539 
540 */
541 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp, Vec ptr, Vec *result)
542 {
543   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
544 
545   PetscFunctionBegin;
546   if (!ptr) {
547     if (!pipefgmres->sol_temp) { PetscCall(VecDuplicate(ksp->vec_sol, &pipefgmres->sol_temp)); }
548     ptr = pipefgmres->sol_temp;
549   }
550   if (!pipefgmres->nrs) {
551     /* allocate the work area */
552     PetscCall(PetscMalloc1(pipefgmres->max_k, &pipefgmres->nrs));
553   }
554 
555   PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs, ksp->vec_sol, ptr, ksp, pipefgmres->it));
556   if (result) *result = ptr;
557   PetscFunctionReturn(0);
558 }
559 
560 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject)
561 {
562   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
563   PetscBool       flg;
564   PetscScalar     shift;
565 
566   PetscFunctionBegin;
567   PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
568   PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined FGMRES Options");
569   PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift", "shift parameter", "KSPPIPEFGMRESSetShift", pipefgmres->shift, &shift, &flg));
570   if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp, shift));
571   PetscOptionsHeadEnd();
572   PetscFunctionReturn(0);
573 }
574 
575 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp, PetscViewer viewer)
576 {
577   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
578   PetscBool       iascii, isstring;
579 
580   PetscFunctionBegin;
581   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
582   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
583 
584   if (iascii) {
585     PetscCall(PetscViewerASCIIPrintf(viewer, "  restart=%" PetscInt_FMT "\n", pipefgmres->max_k));
586     PetscCall(PetscViewerASCIIPrintf(viewer, "  happy breakdown tolerance %g\n", (double)pipefgmres->haptol));
587 #if defined(PETSC_USE_COMPLEX)
588     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
589 #else
590     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g\n", (double)pipefgmres->shift));
591 #endif
592   } else if (isstring) {
593     PetscCall(PetscViewerStringSPrintf(viewer, "restart %" PetscInt_FMT, pipefgmres->max_k));
594 #if defined(PETSC_USE_COMPLEX)
595     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
596 #else
597     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g\n", (double)pipefgmres->shift));
598 #endif
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
604 {
605   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
606   PetscInt        i;
607 
608   PetscFunctionBegin;
609   PetscCall(PetscFree(pipefgmres->prevecs));
610   PetscCall(PetscFree(pipefgmres->zvecs));
611   for (i = 0; i < pipefgmres->nwork_alloc; i++) {
612     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->prevecs_user_work[i]));
613     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->zvecs_user_work[i]));
614   }
615   PetscCall(PetscFree(pipefgmres->prevecs_user_work));
616   PetscCall(PetscFree(pipefgmres->zvecs_user_work));
617   PetscCall(PetscFree(pipefgmres->redux));
618   PetscCall(KSPReset_GMRES(ksp));
619   PetscFunctionReturn(0);
620 }
621 
622 /*MC
623    KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method.
624 
625    A flexible, 1-stage pipelined variant of GMRES.
626 
627    Options Database Keys:
628 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
629 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
630 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
631 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
632                              vectors are allocated as needed)
633 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
634 
635    Level: intermediate
636 
637    Notes:
638 
639    This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter.
640 
641    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
642 
643    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES).
644    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
645    See the FAQ on the PETSc website for details.
646 
647    Developer Notes:
648     This class is subclassed off of KSPGMRES.
649 
650    Reference:
651     P. Sanan, S.M. Schnepp, and D.A. May,
652     "Pipelined, Flexible Krylov Subspace Methods,"
653     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
654     DOI: 10.1137/15M1049130
655 
656 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES`
657           `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()`
658 M*/
659 
660 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
661 {
662   KSP_PIPEFGMRES *pipefgmres;
663 
664   PetscFunctionBegin;
665   PetscCall(PetscNew(&pipefgmres));
666 
667   ksp->data                              = (void *)pipefgmres;
668   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
669   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
670   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
671   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
672   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
673   ksp->ops->view                         = KSPView_PIPEFGMRES;
674   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
675   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
676   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
677 
678   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 3));
679   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
680 
681   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
682   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
683   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES));
684 
685   pipefgmres->nextra_vecs    = 1;
686   pipefgmres->haptol         = 1.0e-30;
687   pipefgmres->q_preallocate  = 0;
688   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
689   pipefgmres->orthog         = NULL;
690   pipefgmres->nrs            = NULL;
691   pipefgmres->sol_temp       = NULL;
692   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
693   pipefgmres->Rsvd           = NULL;
694   pipefgmres->orthogwork     = NULL;
695   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
696   pipefgmres->shift          = 1.0;
697   PetscFunctionReturn(0);
698 }
699 
700 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp, PetscInt it)
701 {
702   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
703   PetscInt        nwork      = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
704   PetscInt        nalloc;                               /* number to allocate */
705   PetscInt        k;
706 
707   PetscFunctionBegin;
708   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
709                                       in a single chunk */
710 
711   /* Adjust the number to allocate to make sure that we don't exceed the
712      number of available slots (pipefgmres->vecs_allocated)*/
713   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
714   if (!nalloc) PetscFunctionReturn(0);
715 
716   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
717 
718   /* work vectors */
719   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->user_work[nwork], 0, NULL));
720   for (k = 0; k < nalloc; k++) pipefgmres->vecs[it + VEC_OFFSET + k] = pipefgmres->user_work[nwork][k];
721   /* specify size of chunk allocated */
722   pipefgmres->mwork_alloc[nwork] = nalloc;
723 
724   /* preconditioned vectors (note we don't use VEC_OFFSET) */
725   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->prevecs_user_work[nwork], 0, NULL));
726   for (k = 0; k < nalloc; k++) pipefgmres->prevecs[it + k] = pipefgmres->prevecs_user_work[nwork][k];
727 
728   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->zvecs_user_work[nwork], 0, NULL));
729   for (k = 0; k < nalloc; k++) pipefgmres->zvecs[it + k] = pipefgmres->zvecs_user_work[nwork][k];
730 
731   /* increment the number of work vector chunks */
732   pipefgmres->nwork_alloc++;
733   PetscFunctionReturn(0);
734 }
735 
736 /*@
737   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver.
738 
739   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there).
740 
741 Logically Collective on ksp
742 
743 Input Parameters:
744 +  ksp - the Krylov space context
745 -  shift - the shift
746 
747 Level: intermediate
748 
749 Options Database:
750 . -ksp_pipefgmres_shift <shift> - set the shift parameter
751 
752 .seealso: `KSPComputeEigenvalues()`
753 @*/
754 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp, PetscScalar shift)
755 {
756   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
757 
758   PetscFunctionBegin;
759   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
760   PetscValidLogicalCollectiveScalar(ksp, shift, 2);
761   pipefgmres->shift = shift;
762   PetscFunctionReturn(0);
763 }
764