xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision 30bab8dbce35fbb76248bd0fea1b2bc98f8a4f04)
1 
2 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I  "petscksp.h"  I*/
3 
4 static PetscBool  cited      = PETSC_FALSE;
5 static const char citation[] = "@article{SSM2016,\n"
6                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
7                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
8                                "  journal = {SIAM Journal on Scientific Computing},\n"
9                                "  volume = {38},\n"
10                                "  number = {5},\n"
11                                "  pages = {C441-C470},\n"
12                                "  year = {2016},\n"
13                                "  doi = {10.1137/15M1049130},\n"
14                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
15                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
16                                "}\n";
17 
18 #define PIPEFGMRES_DELTA_DIRECTIONS 10
19 #define PIPEFGMRES_DEFAULT_MAXK     30
20 
21 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP, PetscInt);
22 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *);
23 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
24 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
25 
26 /*
27 
28     KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres.
29 
30     This is called once, usually automatically by KSPSolve() or KSPSetUp(),
31     but can be called directly by KSPSetUp().
32 
33 */
34 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
35 {
36   PetscInt        k;
37   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
38   const PetscInt  max_k      = pipefgmres->max_k;
39 
40   PetscFunctionBegin;
41   PetscCall(KSPSetUp_GMRES(ksp));
42 
43   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs));
44   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs_user_work));
45 
46   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->prevecs_user_work[0], 0, NULL));
47   for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
48 
49   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs));
50   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs_user_work));
51 
52   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->redux));
53 
54   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->zvecs_user_work[0], 0, NULL));
55   for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
56 
57   PetscFunctionReturn(PETSC_SUCCESS);
58 }
59 
60 /*
61 
62     KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart.  Return residual
63                   history if requested.
64 
65     input parameters:
66 .        pipefgmres  - structure containing parameters and work areas
67 
68     output parameters:
69 .        itcount - number of iterations used.  If null, ignored.
70 .        converged - 0 if not converged
71 
72     Notes:
73     On entry, the value in vector VEC_VV(0) should be
74     the initial residual.
75 
76 */
77 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount, KSP ksp)
78 {
79   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
80   PetscReal       res_norm;
81   PetscReal       hapbnd, tt;
82   PetscScalar    *hh, *hes, *lhh, shift = pipefgmres->shift;
83   PetscBool       hapend = PETSC_FALSE;      /* indicates happy breakdown ending */
84   PetscInt        loc_it;                    /* local count of # of dir. in Krylov space */
85   PetscInt        max_k = pipefgmres->max_k; /* max # of directions Krylov space */
86   PetscInt        i, j, k;
87   Mat             Amat, Pmat;
88   Vec             Q, W;                      /* Pipelining vectors */
89   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
90 
91   PetscFunctionBegin;
92   if (itcount) *itcount = 0;
93 
94   /* Assign simpler names to these vectors, allocated as pipelining workspace */
95   Q = VEC_Q;
96   W = VEC_W;
97 
98   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
99   /* Note that we add an extra value here to allow for a single reduction */
100   if (!pipefgmres->orthogwork) PetscCall(PetscMalloc1(pipefgmres->max_k + 2, &pipefgmres->orthogwork));
101   lhh = pipefgmres->orthogwork;
102 
103   /* Number of pseudo iterations since last restart is the number
104      of prestart directions */
105   loc_it = 0;
106 
107   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
108      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
109      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
110      (loc_it -1) is passed, so the two are equivalent */
111   pipefgmres->it = (loc_it - 1);
112 
113   /* initial residual is in VEC_VV(0)  - compute its norm*/
114   PetscCall(VecNorm(VEC_VV(0), NORM_2, &res_norm));
115 
116   /* first entry in right-hand-side of hessenberg system is just
117      the initial residual norm */
118   *RS(0) = res_norm;
119 
120   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
121   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
122   else ksp->rnorm = 0;
123   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
124   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
125   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
126 
127   /* check for the convergence - maybe the current guess is good enough */
128   PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
129   if (ksp->reason) {
130     if (itcount) *itcount = 0;
131     PetscFunctionReturn(PETSC_SUCCESS);
132   }
133 
134   /* scale VEC_VV (the initial residual) */
135   PetscCall(VecScale(VEC_VV(0), 1.0 / res_norm));
136 
137   /* Fill the pipeline */
138   PetscCall(KSP_PCApply(ksp, VEC_VV(loc_it), PREVEC(loc_it)));
139   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
140   PetscCall(KSP_MatMult(ksp, Amat, PREVEC(loc_it), ZVEC(loc_it)));
141   PetscCall(VecAXPY(ZVEC(loc_it), -shift, VEC_VV(loc_it))); /* Note shift */
142 
143   /* MAIN ITERATION LOOP BEGINNING*/
144   /* keep iterating until we have converged OR generated the max number
145      of directions OR reached the max number of iterations for the method */
146   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
147     if (loc_it) {
148       PetscCall(KSPLogResidualHistory(ksp, res_norm));
149       PetscCall(KSPMonitor(ksp, ksp->its, res_norm));
150     }
151     pipefgmres->it = (loc_it - 1);
152 
153     /* see if more space is needed for work vectors */
154     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
155       PetscCall(KSPPIPEFGMRESGetNewVectors(ksp, loc_it + 1));
156       /* (loc_it+1) is passed in as number of the first vector that should
157          be allocated */
158     }
159 
160     /* Note that these inner products are with "Z" now, so
161        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
162        not the value from the equivalent FGMRES run (even in exact arithmetic)
163        That is, the H we need for the Arnoldi relation is different from the
164        coefficients we use in the orthogonalization process,because of the shift */
165 
166     /* Do some local twiddling to allow for a single reduction */
167     for (i = 0; i < loc_it + 1; i++) redux[i] = VEC_VV(i);
168     redux[loc_it + 1] = ZVEC(loc_it);
169 
170     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
171     PetscCall(VecMDotBegin(ZVEC(loc_it), loc_it + 2, redux, lhh));
172 
173     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
174     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it))));
175 
176     /* The work to be overlapped with the inner products follows.
177        This is application of the preconditioner and the operator
178        to compute intermediate quantites which will be combined (locally)
179        with the results of the inner products.
180        */
181     PetscCall(KSP_PCApply(ksp, ZVEC(loc_it), Q));
182     PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
183     PetscCall(KSP_MatMult(ksp, Amat, Q, W));
184 
185     /* Compute inner products of the new direction with previous directions,
186        and the norm of the to-be-orthogonalized direction "Z".
187        This information is enough to build the required entries
188        of H. The inner product with VEC_VV(it_loc) is
189        *different* than in the standard FGMRES and need to be dealt with specially.
190        That is, for standard FGMRES the orthogonalization coefficients are the same
191        as the coefficients used in the Arnoldi relation to reconstruct, but here this
192        is not true (albeit only for the one entry of H which we "unshift" below. */
193 
194     /* Finish the dot product, retrieving the extra entry */
195     PetscCall(VecMDotEnd(ZVEC(loc_it), loc_it + 2, redux, lhh));
196     tt = PetscRealPart(lhh[loc_it + 1]);
197 
198     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
199       Note that the Hessenberg entries require a shift, as these are for the
200       relation AU = VH, which is wrt unshifted basis vectors */
201     hh  = HH(0, loc_it);
202     hes = HES(0, loc_it);
203     for (j = 0; j < loc_it; j++) {
204       hh[j]  = lhh[j];
205       hes[j] = lhh[j];
206     }
207     hh[loc_it]  = lhh[loc_it] + shift;
208     hes[loc_it] = lhh[loc_it] + shift;
209 
210     /* we delay applying the shift here */
211     for (j = 0; j <= loc_it; j++) { lhh[j] = -lhh[j]; /* flip sign */ }
212 
213     /* Compute the norm of the un-normalized new direction using the rearranged formula
214        Note that these are shifted ("barred") quantities */
215     for (k = 0; k <= loc_it; k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
216     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
217     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0;
218     if (tt < 0.0) {
219       /* If we detect square root breakdown in the norm, we must restart the algorithm.
220          Here this means we simply break the current loop and reconstruct the solution
221          using the basis we have computed thus far. Note that by breaking immediately,
222          we do not update the iteration count, so computation done in this iteration
223          should be disregarded.
224          */
225       PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n", ksp->its, (double)tt));
226       break;
227     } else {
228       tt = PetscSqrtReal(tt);
229     }
230 
231     /* new entry in hessenburg is the 2-norm of our new direction */
232     hh[loc_it + 1]  = tt;
233     hes[loc_it + 1] = tt;
234 
235     /* The recurred computation for the new direction
236        The division by tt is delayed to the happy breakdown check later
237        Note placement BEFORE the unshift
238        */
239     PetscCall(VecCopy(ZVEC(loc_it), VEC_VV(loc_it + 1)));
240     PetscCall(VecMAXPY(VEC_VV(loc_it + 1), loc_it + 1, lhh, &VEC_VV(0)));
241     /* (VEC_VV(loc_it+1) is not normalized yet) */
242 
243     /* The recurred computation for the preconditioned vector (u) */
244     PetscCall(VecCopy(Q, PREVEC(loc_it + 1)));
245     PetscCall(VecMAXPY(PREVEC(loc_it + 1), loc_it + 1, lhh, &PREVEC(0)));
246     if (tt) PetscCall(VecScale(PREVEC(loc_it + 1), 1.0 / tt));
247 
248     /* Unshift an entry in the GS coefficients ("removing the bar") */
249     lhh[loc_it] -= shift;
250 
251     /* The recurred computation for z (Au)
252        Note placement AFTER the "unshift" */
253     PetscCall(VecCopy(W, ZVEC(loc_it + 1)));
254     PetscCall(VecMAXPY(ZVEC(loc_it + 1), loc_it + 1, lhh, &ZVEC(0)));
255     if (tt) PetscCall(VecScale(ZVEC(loc_it + 1), 1.0 / tt));
256 
257     /* Happy Breakdown Check */
258     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
259     /* RS(loc_it) contains the res_norm from the last iteration  */
260     hapbnd = PetscMin(pipefgmres->haptol, hapbnd);
261     if (tt > hapbnd) {
262       /* scale new direction by its norm  */
263       PetscCall(VecScale(VEC_VV(loc_it + 1), 1.0 / tt));
264     } else {
265       /* This happens when the solution is exactly reached. */
266       /* So there is no new direction... */
267       PetscCall(VecSet(VEC_TEMP, 0.0)); /* set VEC_TEMP to 0 */
268       hapend = PETSC_TRUE;
269     }
270     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
271        current solution would not be exact if HES was singular.  Note that
272        HH non-singular implies that HES is not singular, and HES is guaranteed
273        to be nonsingular when PREVECS are linearly independent and A is
274        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
275        of HES). So we should really add a check to verify that HES is nonsingular.*/
276 
277     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
278        here to check that the hessenberg matrix is indeed non-singular (since
279        FGMRES does not guarantee this) */
280 
281     /* Now apply rotations to new col of hessenberg (and right side of system),
282        calculate new rotation, and get new residual norm at the same time*/
283     PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp, loc_it, &hapend, &res_norm));
284     if (ksp->reason) break;
285 
286     loc_it++;
287     pipefgmres->it = (loc_it - 1); /* Add this here in case it has converged */
288 
289     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
290     ksp->its++;
291     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
292     else ksp->rnorm = 0;
293     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
294 
295     PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
296 
297     /* Catch error in happy breakdown and signal convergence and break from loop */
298     if (hapend) {
299       if (!ksp->reason) {
300         PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "You reached the happy break down, but convergence was not indicated. Residual norm = %g", (double)res_norm);
301         ksp->reason = KSP_DIVERGED_BREAKDOWN;
302         break;
303       }
304     }
305   }
306   /* END OF ITERATION LOOP */
307   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
308 
309   /*
310      Monitor if we know that we will not return for a restart */
311   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
312 
313   if (itcount) *itcount = loc_it;
314 
315   /*
316     Down here we have to solve for the "best" coefficients of the Krylov
317     columns, add the solution values together, and possibly unwind the
318     preconditioning from the solution
319    */
320 
321   /* Form the solution (or the solution so far) */
322   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln
323      properly navigates */
324 
325   PetscCall(KSPPIPEFGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, loc_it - 1));
326 
327   PetscFunctionReturn(PETSC_SUCCESS);
328 }
329 
330 /*
331     KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method.
332 
333    Input Parameter:
334 .     ksp - the Krylov space object that was set to use pipefgmres
335 
336    Output Parameter:
337 .     outits - number of iterations used
338 
339 */
340 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
341 {
342   PetscInt        its, itcount;
343   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
344   PetscBool       guess_zero = ksp->guess_zero;
345 
346   PetscFunctionBegin;
347   /* We have not checked these routines for use with complex numbers. The inner products
348      are likely not defined correctly for that case */
349   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
350 
351   PetscCall(PetscCitationsRegister(citation, &cited));
352 
353   PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
354   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
355   ksp->its = 0;
356   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
357 
358   itcount     = 0;
359   ksp->reason = KSP_CONVERGED_ITERATING;
360   while (!ksp->reason) {
361     PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
362     PetscCall(KSPPIPEFGMRESCycle(&its, ksp));
363     itcount += its;
364     if (itcount >= ksp->max_it) {
365       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
366       break;
367     }
368     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
369   }
370   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
371   PetscFunctionReturn(PETSC_SUCCESS);
372 }
373 
374 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
375 {
376   PetscFunctionBegin;
377   PetscCall(KSPReset_PIPEFGMRES(ksp));
378   PetscCall(KSPDestroy_GMRES(ksp));
379   PetscFunctionReturn(PETSC_SUCCESS);
380 }
381 
382 /*
383     KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the
384                       current iterates.
385 
386     Input parameters:
387         nrs - work area of size it + 1.
388         vguess  - index of initial guess
389         vdest - index of result.  Note that vguess may == vdest (replace
390                 guess with the solution).
391         it - HH upper triangular part is a block of size (it+1) x (it+1)
392 
393      This is an internal routine that knows about the PIPEFGMRES internals.
394  */
395 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it)
396 {
397   PetscScalar     tt;
398   PetscInt        k, j;
399   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
400 
401   PetscFunctionBegin;
402   /* Solve for solution vector that minimizes the residual */
403 
404   if (it < 0) {                        /* no pipefgmres steps have been performed */
405     PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */
406     PetscFunctionReturn(PETSC_SUCCESS);
407   }
408 
409   /* solve the upper triangular system - RS is the right side and HH is
410      the upper triangular matrix  - put soln in nrs */
411   if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it);
412   else nrs[it] = 0.0;
413 
414   for (k = it - 1; k >= 0; k--) {
415     tt = *RS(k);
416     for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j];
417     nrs[k] = tt / *HH(k, k);
418   }
419 
420   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
421   PetscCall(VecZeroEntries(VEC_TEMP));
422   PetscCall(VecMAXPY(VEC_TEMP, it + 1, nrs, &PREVEC(0)));
423 
424   /* add solution to previous solution */
425   if (vdest == vguess) {
426     PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
427   } else {
428     PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess));
429   }
430   PetscFunctionReturn(PETSC_SUCCESS);
431 }
432 
433 /*
434 
435     KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
436                             Return new residual.
437 
438     input parameters:
439 
440 .        ksp -    Krylov space object
441 .        it  -    plane rotations are applied to the (it+1)th column of the
442                   modified hessenberg (i.e. HH(:,it))
443 .        hapend - PETSC_FALSE not happy breakdown ending.
444 
445     output parameters:
446 .        res - the new residual
447 
448  */
449 /*
450 .  it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this
451  */
452 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res)
453 {
454   PetscScalar    *hh, *cc, *ss, *rs;
455   PetscInt        j;
456   PetscReal       hapbnd;
457   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
458 
459   PetscFunctionBegin;
460   hh = HH(0, it); /* pointer to beginning of column to update */
461   cc = CC(0);     /* beginning of cosine rotations */
462   ss = SS(0);     /* beginning of sine rotations */
463   rs = RS(0);     /* right hand side of least squares system */
464 
465   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
466   for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j];
467 
468   /* check for the happy breakdown */
469   hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pipefgmres->haptol);
470   if (PetscAbsScalar(hh[it + 1]) < hapbnd) {
471     PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it))));
472     *hapend = PETSC_TRUE;
473   }
474 
475   /* Apply all the previously computed plane rotations to the new column
476      of the Hessenberg matrix */
477   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
478      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
479 
480   for (j = 0; j < it; j++) {
481     PetscScalar hhj = hh[j];
482     hh[j]           = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1];
483     hh[j + 1]       = -ss[j] * hhj + cc[j] * hh[j + 1];
484   }
485 
486   /*
487     compute the new plane rotation, and apply it to:
488      1) the right-hand-side of the Hessenberg system (RS)
489         note: it affects RS(it) and RS(it+1)
490      2) the new column of the Hessenberg matrix
491         note: it affects HH(it,it) which is currently pointed to
492         by hh and HH(it+1, it) (*(hh+1))
493     thus obtaining the updated value of the residual...
494   */
495 
496   /* compute new plane rotation */
497 
498   if (!*hapend) {
499     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1])));
500     if (delta == 0.0) {
501       ksp->reason = KSP_DIVERGED_NULL;
502       PetscFunctionReturn(PETSC_SUCCESS);
503     }
504 
505     cc[it] = hh[it] / delta;     /* new cosine value */
506     ss[it] = hh[it + 1] / delta; /* new sine value */
507 
508     hh[it]     = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1];
509     rs[it + 1] = -ss[it] * rs[it];
510     rs[it]     = PetscConj(cc[it]) * rs[it];
511     *res       = PetscAbsScalar(rs[it + 1]);
512   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
513             another rotation matrix (so RH doesn't change).  The new residual is
514             always the new sine term times the residual from last time (RS(it)),
515             but now the new sine rotation would be zero...so the residual should
516             be zero...so we will multiply "zero" by the last residual.  This might
517             not be exactly what we want to do here -could just return "zero". */
518 
519     *res = 0.0;
520   }
521   PetscFunctionReturn(PETSC_SUCCESS);
522 }
523 
524 /*
525    KSPBuildSolution_PIPEFGMRES
526 
527      Input Parameter:
528 .     ksp - the Krylov space object
529 .     ptr-
530 
531    Output Parameter:
532 .     result - the solution
533 
534    Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle
535    calls directly.
536 
537 */
538 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp, Vec ptr, Vec *result)
539 {
540   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
541 
542   PetscFunctionBegin;
543   if (!ptr) {
544     if (!pipefgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &pipefgmres->sol_temp));
545     ptr = pipefgmres->sol_temp;
546   }
547   if (!pipefgmres->nrs) {
548     /* allocate the work area */
549     PetscCall(PetscMalloc1(pipefgmres->max_k, &pipefgmres->nrs));
550   }
551 
552   PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs, ksp->vec_sol, ptr, ksp, pipefgmres->it));
553   if (result) *result = ptr;
554   PetscFunctionReturn(PETSC_SUCCESS);
555 }
556 
557 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject)
558 {
559   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
560   PetscBool       flg;
561   PetscScalar     shift;
562 
563   PetscFunctionBegin;
564   PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
565   PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined FGMRES Options");
566   PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift", "shift parameter", "KSPPIPEFGMRESSetShift", pipefgmres->shift, &shift, &flg));
567   if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp, shift));
568   PetscOptionsHeadEnd();
569   PetscFunctionReturn(PETSC_SUCCESS);
570 }
571 
572 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp, PetscViewer viewer)
573 {
574   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
575   PetscBool       iascii, isstring;
576 
577   PetscFunctionBegin;
578   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
579   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
580 
581   if (iascii) {
582     PetscCall(PetscViewerASCIIPrintf(viewer, "  restart=%" PetscInt_FMT "\n", pipefgmres->max_k));
583     PetscCall(PetscViewerASCIIPrintf(viewer, "  happy breakdown tolerance %g\n", (double)pipefgmres->haptol));
584 #if defined(PETSC_USE_COMPLEX)
585     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
586 #else
587     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g\n", (double)pipefgmres->shift));
588 #endif
589   } else if (isstring) {
590     PetscCall(PetscViewerStringSPrintf(viewer, "restart %" PetscInt_FMT, pipefgmres->max_k));
591 #if defined(PETSC_USE_COMPLEX)
592     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
593 #else
594     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g\n", (double)pipefgmres->shift));
595 #endif
596   }
597   PetscFunctionReturn(PETSC_SUCCESS);
598 }
599 
600 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
601 {
602   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
603   PetscInt        i;
604 
605   PetscFunctionBegin;
606   PetscCall(PetscFree(pipefgmres->prevecs));
607   PetscCall(PetscFree(pipefgmres->zvecs));
608   for (i = 0; i < pipefgmres->nwork_alloc; i++) {
609     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->prevecs_user_work[i]));
610     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->zvecs_user_work[i]));
611   }
612   PetscCall(PetscFree(pipefgmres->prevecs_user_work));
613   PetscCall(PetscFree(pipefgmres->zvecs_user_work));
614   PetscCall(PetscFree(pipefgmres->redux));
615   PetscCall(KSPReset_GMRES(ksp));
616   PetscFunctionReturn(PETSC_SUCCESS);
617 }
618 
619 /*MC
620    KSPPIPEFGMRES - Implements the Pipelined (1-stage) Flexible Generalized Minimal Residual method. [](sec_pipelineksp). [](sec_flexibleksp)
621 
622    Options Database Keys:
623 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
624 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
625 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
626 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
627                              vectors are allocated as needed)
628 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
629 
630    Level: intermediate
631 
632    Notes:
633    This variant is not "explicitly normalized" like `KSPPGMRES`, and requires a shift parameter.
634 
635    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
636 
637    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with `KSPFGMRES`).
638 
639    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
640    See [](doc_faq_pipelined)
641 
642    Developer Note:
643     This class is subclassed off of `KSPGMRES`.
644 
645    Contributed by:
646    P. Sanan and S.M. Schnepp
647 
648    Reference:
649     P. Sanan, S.M. Schnepp, and D.A. May,
650     "Pipelined, Flexible Krylov Subspace Methods,"
651     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
652     DOI: 10.1137/15M1049130
653 
654 .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), [](sec_flexibleksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES`
655           `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()`
656 M*/
657 
658 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
659 {
660   KSP_PIPEFGMRES *pipefgmres;
661 
662   PetscFunctionBegin;
663   PetscCall(PetscNew(&pipefgmres));
664 
665   ksp->data                              = (void *)pipefgmres;
666   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
667   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
668   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
669   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
670   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
671   ksp->ops->view                         = KSPView_PIPEFGMRES;
672   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
673   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
674   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
675 
676   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 3));
677   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
678 
679   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
680   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
681   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES));
682 
683   pipefgmres->nextra_vecs    = 1;
684   pipefgmres->haptol         = 1.0e-30;
685   pipefgmres->q_preallocate  = 0;
686   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
687   pipefgmres->orthog         = NULL;
688   pipefgmres->nrs            = NULL;
689   pipefgmres->sol_temp       = NULL;
690   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
691   pipefgmres->Rsvd           = NULL;
692   pipefgmres->orthogwork     = NULL;
693   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
694   pipefgmres->shift          = 1.0;
695   PetscFunctionReturn(PETSC_SUCCESS);
696 }
697 
698 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp, PetscInt it)
699 {
700   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
701   PetscInt        nwork      = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
702   PetscInt        nalloc;                               /* number to allocate */
703   PetscInt        k;
704 
705   PetscFunctionBegin;
706   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
707                                       in a single chunk */
708 
709   /* Adjust the number to allocate to make sure that we don't exceed the
710      number of available slots (pipefgmres->vecs_allocated)*/
711   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
712   if (!nalloc) PetscFunctionReturn(PETSC_SUCCESS);
713 
714   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
715 
716   /* work vectors */
717   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->user_work[nwork], 0, NULL));
718   for (k = 0; k < nalloc; k++) pipefgmres->vecs[it + VEC_OFFSET + k] = pipefgmres->user_work[nwork][k];
719   /* specify size of chunk allocated */
720   pipefgmres->mwork_alloc[nwork] = nalloc;
721 
722   /* preconditioned vectors (note we don't use VEC_OFFSET) */
723   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->prevecs_user_work[nwork], 0, NULL));
724   for (k = 0; k < nalloc; k++) pipefgmres->prevecs[it + k] = pipefgmres->prevecs_user_work[nwork][k];
725 
726   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->zvecs_user_work[nwork], 0, NULL));
727   for (k = 0; k < nalloc; k++) pipefgmres->zvecs[it + k] = pipefgmres->zvecs_user_work[nwork][k];
728 
729   /* increment the number of work vector chunks */
730   pipefgmres->nwork_alloc++;
731   PetscFunctionReturn(PETSC_SUCCESS);
732 }
733 
734 /*@
735   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined `KSPPIPEFGMRES` solver.
736 
737   Logically Collective
738 
739   Input Parameters:
740 +  ksp - the Krylov space context
741 -  shift - the shift
742 
743   Options Database Key:
744 .  -ksp_pipefgmres_shift <shift> - set the shift parameter
745 
746   Level: intermediate
747 
748   Note:
749   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator.
750   This can be achieved with PETSc itself by using a few iterations of a Krylov method.
751   See `KSPComputeEigenvalues()` (and note the caveats there).
752 
753 .seealso: [](ch_ksp), `KSPPIPEFGMRES`, `KSPComputeEigenvalues()`
754 @*/
755 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp, PetscScalar shift)
756 {
757   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
758 
759   PetscFunctionBegin;
760   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
761   PetscValidLogicalCollectiveScalar(ksp, shift, 2);
762   pipefgmres->shift = shift;
763   PetscFunctionReturn(PETSC_SUCCESS);
764 }
765