xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision 6075d8dfa5b9941c458d491b72f7ff6db26efedf)
1 /*
2   Contributed by Patrick Sanan and Sascha M. Schnepp
3 */
4 
5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I  "petscksp.h"  I*/
6 
7 static PetscBool  cited      = PETSC_FALSE;
8 static const char citation[] = "@article{SSM2016,\n"
9                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
10                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
11                                "  journal = {SIAM Journal on Scientific Computing},\n"
12                                "  volume = {38},\n"
13                                "  number = {5},\n"
14                                "  pages = {C441-C470},\n"
15                                "  year = {2016},\n"
16                                "  doi = {10.1137/15M1049130},\n"
17                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
18                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
19                                "}\n";
20 
21 #define PIPEFGMRES_DELTA_DIRECTIONS 10
22 #define PIPEFGMRES_DEFAULT_MAXK     30
23 
24 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP, PetscInt);
25 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *);
26 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
27 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
28 
29 /*
30 
31     KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres.
32 
33     This is called once, usually automatically by KSPSolve() or KSPSetUp(),
34     but can be called directly by KSPSetUp().
35 
36 */
37 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp) {
38   PetscInt        k;
39   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
40   const PetscInt  max_k      = pipefgmres->max_k;
41 
42   PetscFunctionBegin;
43   PetscCall(KSPSetUp_GMRES(ksp));
44 
45   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs));
46   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs_user_work));
47 
48   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->prevecs_user_work[0], 0, NULL));
49   for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
50 
51   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs));
52   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs_user_work));
53 
54   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->redux));
55 
56   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->zvecs_user_work[0], 0, NULL));
57   for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
58 
59   PetscFunctionReturn(0);
60 }
61 
62 /*
63 
64     KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart.  Return residual
65                   history if requested.
66 
67     input parameters:
68 .        pipefgmres  - structure containing parameters and work areas
69 
70     output parameters:
71 .        itcount - number of iterations used.  If null, ignored.
72 .        converged - 0 if not converged
73 
74     Notes:
75     On entry, the value in vector VEC_VV(0) should be
76     the initial residual.
77 
78 */
79 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount, KSP ksp) {
80   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
81   PetscReal       res_norm;
82   PetscReal       hapbnd, tt;
83   PetscScalar    *hh, *hes, *lhh, shift = pipefgmres->shift;
84   PetscBool       hapend = PETSC_FALSE;      /* indicates happy breakdown ending */
85   PetscInt        loc_it;                    /* local count of # of dir. in Krylov space */
86   PetscInt        max_k = pipefgmres->max_k; /* max # of directions Krylov space */
87   PetscInt        i, j, k;
88   Mat             Amat, Pmat;
89   Vec             Q, W;                      /* Pipelining vectors */
90   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
91 
92   PetscFunctionBegin;
93   if (itcount) *itcount = 0;
94 
95   /* Assign simpler names to these vectors, allocated as pipelining workspace */
96   Q = VEC_Q;
97   W = VEC_W;
98 
99   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
100   /* Note that we add an extra value here to allow for a single reduction */
101   if (!pipefgmres->orthogwork) PetscCall(PetscMalloc1(pipefgmres->max_k + 2, &pipefgmres->orthogwork));
102   lhh = pipefgmres->orthogwork;
103 
104   /* Number of pseudo iterations since last restart is the number
105      of prestart directions */
106   loc_it = 0;
107 
108   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
109      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
110      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
111      (loc_it -1) is passed, so the two are equivalent */
112   pipefgmres->it = (loc_it - 1);
113 
114   /* initial residual is in VEC_VV(0)  - compute its norm*/
115   PetscCall(VecNorm(VEC_VV(0), NORM_2, &res_norm));
116 
117   /* first entry in right-hand-side of hessenberg system is just
118      the initial residual norm */
119   *RS(0) = res_norm;
120 
121   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
122   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
123   else ksp->rnorm = 0;
124   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
125   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
126   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
127 
128   /* check for the convergence - maybe the current guess is good enough */
129   PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
130   if (ksp->reason) {
131     if (itcount) *itcount = 0;
132     PetscFunctionReturn(0);
133   }
134 
135   /* scale VEC_VV (the initial residual) */
136   PetscCall(VecScale(VEC_VV(0), 1.0 / res_norm));
137 
138   /* Fill the pipeline */
139   PetscCall(KSP_PCApply(ksp, VEC_VV(loc_it), PREVEC(loc_it)));
140   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
141   PetscCall(KSP_MatMult(ksp, Amat, PREVEC(loc_it), ZVEC(loc_it)));
142   PetscCall(VecAXPY(ZVEC(loc_it), -shift, VEC_VV(loc_it))); /* Note shift */
143 
144   /* MAIN ITERATION LOOP BEGINNING*/
145   /* keep iterating until we have converged OR generated the max number
146      of directions OR reached the max number of iterations for the method */
147   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
148     if (loc_it) {
149       PetscCall(KSPLogResidualHistory(ksp, res_norm));
150       PetscCall(KSPMonitor(ksp, ksp->its, res_norm));
151     }
152     pipefgmres->it = (loc_it - 1);
153 
154     /* see if more space is needed for work vectors */
155     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
156       PetscCall(KSPPIPEFGMRESGetNewVectors(ksp, loc_it + 1));
157       /* (loc_it+1) is passed in as number of the first vector that should
158          be allocated */
159     }
160 
161     /* Note that these inner products are with "Z" now, so
162        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
163        not the value from the equivalent FGMRES run (even in exact arithmetic)
164        That is, the H we need for the Arnoldi relation is different from the
165        coefficients we use in the orthogonalization process,because of the shift */
166 
167     /* Do some local twiddling to allow for a single reduction */
168     for (i = 0; i < loc_it + 1; i++) redux[i] = VEC_VV(i);
169     redux[loc_it + 1] = ZVEC(loc_it);
170 
171     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
172     PetscCall(VecMDotBegin(ZVEC(loc_it), loc_it + 2, redux, lhh));
173 
174     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
175     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it))));
176 
177     /* The work to be overlapped with the inner products follows.
178        This is application of the preconditioner and the operator
179        to compute intermediate quantites which will be combined (locally)
180        with the results of the inner products.
181        */
182     PetscCall(KSP_PCApply(ksp, ZVEC(loc_it), Q));
183     PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
184     PetscCall(KSP_MatMult(ksp, Amat, Q, W));
185 
186     /* Compute inner products of the new direction with previous directions,
187        and the norm of the to-be-orthogonalized direction "Z".
188        This information is enough to build the required entries
189        of H. The inner product with VEC_VV(it_loc) is
190        *different* than in the standard FGMRES and need to be dealt with specially.
191        That is, for standard FGMRES the orthogonalization coefficients are the same
192        as the coefficients used in the Arnoldi relation to reconstruct, but here this
193        is not true (albeit only for the one entry of H which we "unshift" below. */
194 
195     /* Finish the dot product, retrieving the extra entry */
196     PetscCall(VecMDotEnd(ZVEC(loc_it), loc_it + 2, redux, lhh));
197     tt = PetscRealPart(lhh[loc_it + 1]);
198 
199     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
200       Note that the Hessenberg entries require a shift, as these are for the
201       relation AU = VH, which is wrt unshifted basis vectors */
202     hh  = HH(0, loc_it);
203     hes = HES(0, loc_it);
204     for (j = 0; j < loc_it; j++) {
205       hh[j]  = lhh[j];
206       hes[j] = lhh[j];
207     }
208     hh[loc_it]  = lhh[loc_it] + shift;
209     hes[loc_it] = lhh[loc_it] + shift;
210 
211     /* we delay applying the shift here */
212     for (j = 0; j <= loc_it; j++) { lhh[j] = -lhh[j]; /* flip sign */ }
213 
214     /* Compute the norm of the un-normalized new direction using the rearranged formula
215        Note that these are shifted ("barred") quantities */
216     for (k = 0; k <= loc_it; k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
217     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
218     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0;
219     if (tt < 0.0) {
220       /* If we detect square root breakdown in the norm, we must restart the algorithm.
221          Here this means we simply break the current loop and reconstruct the solution
222          using the basis we have computed thus far. Note that by breaking immediately,
223          we do not update the iteration count, so computation done in this iteration
224          should be disregarded.
225          */
226       PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n", ksp->its, (double)tt));
227       break;
228     } else {
229       tt = PetscSqrtReal(tt);
230     }
231 
232     /* new entry in hessenburg is the 2-norm of our new direction */
233     hh[loc_it + 1]  = tt;
234     hes[loc_it + 1] = tt;
235 
236     /* The recurred computation for the new direction
237        The division by tt is delayed to the happy breakdown check later
238        Note placement BEFORE the unshift
239        */
240     PetscCall(VecCopy(ZVEC(loc_it), VEC_VV(loc_it + 1)));
241     PetscCall(VecMAXPY(VEC_VV(loc_it + 1), loc_it + 1, lhh, &VEC_VV(0)));
242     /* (VEC_VV(loc_it+1) is not normalized yet) */
243 
244     /* The recurred computation for the preconditioned vector (u) */
245     PetscCall(VecCopy(Q, PREVEC(loc_it + 1)));
246     PetscCall(VecMAXPY(PREVEC(loc_it + 1), loc_it + 1, lhh, &PREVEC(0)));
247     PetscCall(VecScale(PREVEC(loc_it + 1), 1.0 / tt));
248 
249     /* Unshift an entry in the GS coefficients ("removing the bar") */
250     lhh[loc_it] -= shift;
251 
252     /* The recurred computation for z (Au)
253        Note placement AFTER the "unshift" */
254     PetscCall(VecCopy(W, ZVEC(loc_it + 1)));
255     PetscCall(VecMAXPY(ZVEC(loc_it + 1), loc_it + 1, lhh, &ZVEC(0)));
256     PetscCall(VecScale(ZVEC(loc_it + 1), 1.0 / tt));
257 
258     /* Happy Breakdown Check */
259     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
260     /* RS(loc_it) contains the res_norm from the last iteration  */
261     hapbnd = PetscMin(pipefgmres->haptol, hapbnd);
262     if (tt > hapbnd) {
263       /* scale new direction by its norm  */
264       PetscCall(VecScale(VEC_VV(loc_it + 1), 1.0 / tt));
265     } else {
266       /* This happens when the solution is exactly reached. */
267       /* So there is no new direction... */
268       PetscCall(VecSet(VEC_TEMP, 0.0)); /* set VEC_TEMP to 0 */
269       hapend = PETSC_TRUE;
270     }
271     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
272        current solution would not be exact if HES was singular.  Note that
273        HH non-singular implies that HES is not singular, and HES is guaranteed
274        to be nonsingular when PREVECS are linearly independent and A is
275        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
276        of HES). So we should really add a check to verify that HES is nonsingular.*/
277 
278     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
279        here to check that the hessenberg matrix is indeed non-singular (since
280        FGMRES does not guarantee this) */
281 
282     /* Now apply rotations to new col of hessenberg (and right side of system),
283        calculate new rotation, and get new residual norm at the same time*/
284     PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp, loc_it, &hapend, &res_norm));
285     if (ksp->reason) break;
286 
287     loc_it++;
288     pipefgmres->it = (loc_it - 1); /* Add this here in case it has converged */
289 
290     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
291     ksp->its++;
292     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
293     else ksp->rnorm = 0;
294     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
295 
296     PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
297 
298     /* Catch error in happy breakdown and signal convergence and break from loop */
299     if (hapend) {
300       if (!ksp->reason) {
301         PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "You reached the happy break down, but convergence was not indicated. Residual norm = %g", (double)res_norm);
302         ksp->reason = KSP_DIVERGED_BREAKDOWN;
303         break;
304       }
305     }
306   }
307   /* END OF ITERATION LOOP */
308   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
309 
310   /*
311      Monitor if we know that we will not return for a restart */
312   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
313 
314   if (itcount) *itcount = loc_it;
315 
316   /*
317     Down here we have to solve for the "best" coefficients of the Krylov
318     columns, add the solution values together, and possibly unwind the
319     preconditioning from the solution
320    */
321 
322   /* Form the solution (or the solution so far) */
323   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln
324      properly navigates */
325 
326   PetscCall(KSPPIPEFGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, loc_it - 1));
327 
328   PetscFunctionReturn(0);
329 }
330 
331 /*
332     KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method.
333 
334    Input Parameter:
335 .     ksp - the Krylov space object that was set to use pipefgmres
336 
337    Output Parameter:
338 .     outits - number of iterations used
339 
340 */
341 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp) {
342   PetscInt        its, itcount;
343   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
344   PetscBool       guess_zero = ksp->guess_zero;
345 
346   PetscFunctionBegin;
347   /* We have not checked these routines for use with complex numbers. The inner products
348      are likely not defined correctly for that case */
349   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
350 
351   PetscCall(PetscCitationsRegister(citation, &cited));
352 
353   PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
354   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
355   ksp->its = 0;
356   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
357 
358   itcount     = 0;
359   ksp->reason = KSP_CONVERGED_ITERATING;
360   while (!ksp->reason) {
361     PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
362     PetscCall(KSPPIPEFGMRESCycle(&its, ksp));
363     itcount += its;
364     if (itcount >= ksp->max_it) {
365       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
366       break;
367     }
368     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
369   }
370   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
371   PetscFunctionReturn(0);
372 }
373 
374 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp) {
375   PetscFunctionBegin;
376   PetscCall(KSPReset_PIPEFGMRES(ksp));
377   PetscCall(KSPDestroy_GMRES(ksp));
378   PetscFunctionReturn(0);
379 }
380 
381 /*
382     KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the
383                       current iterates.
384 
385     Input parameters:
386         nrs - work area of size it + 1.
387         vguess  - index of initial guess
388         vdest - index of result.  Note that vguess may == vdest (replace
389                 guess with the solution).
390         it - HH upper triangular part is a block of size (it+1) x (it+1)
391 
392      This is an internal routine that knows about the PIPEFGMRES internals.
393  */
394 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it) {
395   PetscScalar     tt;
396   PetscInt        k, j;
397   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
398 
399   PetscFunctionBegin;
400   /* Solve for solution vector that minimizes the residual */
401 
402   if (it < 0) {                        /* no pipefgmres steps have been performed */
403     PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */
404     PetscFunctionReturn(0);
405   }
406 
407   /* solve the upper triangular system - RS is the right side and HH is
408      the upper triangular matrix  - put soln in nrs */
409   if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it);
410   else nrs[it] = 0.0;
411 
412   for (k = it - 1; k >= 0; k--) {
413     tt = *RS(k);
414     for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j];
415     nrs[k] = tt / *HH(k, k);
416   }
417 
418   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
419   PetscCall(VecZeroEntries(VEC_TEMP));
420   PetscCall(VecMAXPY(VEC_TEMP, it + 1, nrs, &PREVEC(0)));
421 
422   /* add solution to previous solution */
423   if (vdest == vguess) {
424     PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
425   } else {
426     PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess));
427   }
428   PetscFunctionReturn(0);
429 }
430 
431 /*
432 
433     KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
434                             Return new residual.
435 
436     input parameters:
437 
438 .        ksp -    Krylov space object
439 .        it  -    plane rotations are applied to the (it+1)th column of the
440                   modified hessenberg (i.e. HH(:,it))
441 .        hapend - PETSC_FALSE not happy breakdown ending.
442 
443     output parameters:
444 .        res - the new residual
445 
446  */
447 /*
448 .  it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this
449  */
450 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res) {
451   PetscScalar    *hh, *cc, *ss, *rs;
452   PetscInt        j;
453   PetscReal       hapbnd;
454   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
455 
456   PetscFunctionBegin;
457   hh = HH(0, it); /* pointer to beginning of column to update */
458   cc = CC(0);     /* beginning of cosine rotations */
459   ss = SS(0);     /* beginning of sine rotations */
460   rs = RS(0);     /* right hand side of least squares system */
461 
462   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
463   for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j];
464 
465   /* check for the happy breakdown */
466   hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pipefgmres->haptol);
467   if (PetscAbsScalar(hh[it + 1]) < hapbnd) {
468     PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it))));
469     *hapend = PETSC_TRUE;
470   }
471 
472   /* Apply all the previously computed plane rotations to the new column
473      of the Hessenberg matrix */
474   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
475      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
476 
477   for (j = 0; j < it; j++) {
478     PetscScalar hhj = hh[j];
479     hh[j]           = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1];
480     hh[j + 1]       = -ss[j] * hhj + cc[j] * hh[j + 1];
481   }
482 
483   /*
484     compute the new plane rotation, and apply it to:
485      1) the right-hand-side of the Hessenberg system (RS)
486         note: it affects RS(it) and RS(it+1)
487      2) the new column of the Hessenberg matrix
488         note: it affects HH(it,it) which is currently pointed to
489         by hh and HH(it+1, it) (*(hh+1))
490     thus obtaining the updated value of the residual...
491   */
492 
493   /* compute new plane rotation */
494 
495   if (!*hapend) {
496     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1])));
497     if (delta == 0.0) {
498       ksp->reason = KSP_DIVERGED_NULL;
499       PetscFunctionReturn(0);
500     }
501 
502     cc[it] = hh[it] / delta;     /* new cosine value */
503     ss[it] = hh[it + 1] / delta; /* new sine value */
504 
505     hh[it]     = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1];
506     rs[it + 1] = -ss[it] * rs[it];
507     rs[it]     = PetscConj(cc[it]) * rs[it];
508     *res       = PetscAbsScalar(rs[it + 1]);
509   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
510             another rotation matrix (so RH doesn't change).  The new residual is
511             always the new sine term times the residual from last time (RS(it)),
512             but now the new sine rotation would be zero...so the residual should
513             be zero...so we will multiply "zero" by the last residual.  This might
514             not be exactly what we want to do here -could just return "zero". */
515 
516     *res = 0.0;
517   }
518   PetscFunctionReturn(0);
519 }
520 
521 /*
522    KSPBuildSolution_PIPEFGMRES
523 
524      Input Parameter:
525 .     ksp - the Krylov space object
526 .     ptr-
527 
528    Output Parameter:
529 .     result - the solution
530 
531    Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle
532    calls directly.
533 
534 */
535 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp, Vec ptr, Vec *result) {
536   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
537 
538   PetscFunctionBegin;
539   if (!ptr) {
540     if (!pipefgmres->sol_temp) { PetscCall(VecDuplicate(ksp->vec_sol, &pipefgmres->sol_temp)); }
541     ptr = pipefgmres->sol_temp;
542   }
543   if (!pipefgmres->nrs) {
544     /* allocate the work area */
545     PetscCall(PetscMalloc1(pipefgmres->max_k, &pipefgmres->nrs));
546   }
547 
548   PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs, ksp->vec_sol, ptr, ksp, pipefgmres->it));
549   if (result) *result = ptr;
550   PetscFunctionReturn(0);
551 }
552 
553 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject) {
554   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
555   PetscBool       flg;
556   PetscScalar     shift;
557 
558   PetscFunctionBegin;
559   PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
560   PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined FGMRES Options");
561   PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift", "shift parameter", "KSPPIPEFGMRESSetShift", pipefgmres->shift, &shift, &flg));
562   if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp, shift));
563   PetscOptionsHeadEnd();
564   PetscFunctionReturn(0);
565 }
566 
567 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp, PetscViewer viewer) {
568   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
569   PetscBool       iascii, isstring;
570 
571   PetscFunctionBegin;
572   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
573   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
574 
575   if (iascii) {
576     PetscCall(PetscViewerASCIIPrintf(viewer, "  restart=%" PetscInt_FMT "\n", pipefgmres->max_k));
577     PetscCall(PetscViewerASCIIPrintf(viewer, "  happy breakdown tolerance %g\n", (double)pipefgmres->haptol));
578 #if defined(PETSC_USE_COMPLEX)
579     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
580 #else
581     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g\n", (double)pipefgmres->shift));
582 #endif
583   } else if (isstring) {
584     PetscCall(PetscViewerStringSPrintf(viewer, "restart %" PetscInt_FMT, pipefgmres->max_k));
585 #if defined(PETSC_USE_COMPLEX)
586     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
587 #else
588     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g\n", (double)pipefgmres->shift));
589 #endif
590   }
591   PetscFunctionReturn(0);
592 }
593 
594 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp) {
595   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
596   PetscInt        i;
597 
598   PetscFunctionBegin;
599   PetscCall(PetscFree(pipefgmres->prevecs));
600   PetscCall(PetscFree(pipefgmres->zvecs));
601   for (i = 0; i < pipefgmres->nwork_alloc; i++) {
602     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->prevecs_user_work[i]));
603     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->zvecs_user_work[i]));
604   }
605   PetscCall(PetscFree(pipefgmres->prevecs_user_work));
606   PetscCall(PetscFree(pipefgmres->zvecs_user_work));
607   PetscCall(PetscFree(pipefgmres->redux));
608   PetscCall(KSPReset_GMRES(ksp));
609   PetscFunctionReturn(0);
610 }
611 
612 /*MC
613    KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method.
614 
615    A flexible, 1-stage pipelined variant of GMRES.
616 
617    Options Database Keys:
618 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
619 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
620 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
621 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
622                              vectors are allocated as needed)
623 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
624 
625    Level: intermediate
626 
627    Notes:
628 
629    This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter.
630 
631    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
632 
633    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES).
634    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
635    See the FAQ on the PETSc website for details.
636 
637    Developer Notes:
638     This class is subclassed off of KSPGMRES.
639 
640    Reference:
641     P. Sanan, S.M. Schnepp, and D.A. May,
642     "Pipelined, Flexible Krylov Subspace Methods,"
643     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
644     DOI: 10.1137/15M1049130
645 
646 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES`
647           `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()`
648 M*/
649 
650 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp) {
651   KSP_PIPEFGMRES *pipefgmres;
652 
653   PetscFunctionBegin;
654   PetscCall(PetscNew(&pipefgmres));
655 
656   ksp->data                              = (void *)pipefgmres;
657   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
658   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
659   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
660   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
661   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
662   ksp->ops->view                         = KSPView_PIPEFGMRES;
663   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
664   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
665   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
666 
667   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 3));
668   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
669 
670   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
671   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
672   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES));
673 
674   pipefgmres->nextra_vecs    = 1;
675   pipefgmres->haptol         = 1.0e-30;
676   pipefgmres->q_preallocate  = 0;
677   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
678   pipefgmres->orthog         = NULL;
679   pipefgmres->nrs            = NULL;
680   pipefgmres->sol_temp       = NULL;
681   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
682   pipefgmres->Rsvd           = NULL;
683   pipefgmres->orthogwork     = NULL;
684   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
685   pipefgmres->shift          = 1.0;
686   PetscFunctionReturn(0);
687 }
688 
689 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp, PetscInt it) {
690   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
691   PetscInt        nwork      = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
692   PetscInt        nalloc;                               /* number to allocate */
693   PetscInt        k;
694 
695   PetscFunctionBegin;
696   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
697                                       in a single chunk */
698 
699   /* Adjust the number to allocate to make sure that we don't exceed the
700      number of available slots (pipefgmres->vecs_allocated)*/
701   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
702   if (!nalloc) PetscFunctionReturn(0);
703 
704   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
705 
706   /* work vectors */
707   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->user_work[nwork], 0, NULL));
708   for (k = 0; k < nalloc; k++) pipefgmres->vecs[it + VEC_OFFSET + k] = pipefgmres->user_work[nwork][k];
709   /* specify size of chunk allocated */
710   pipefgmres->mwork_alloc[nwork] = nalloc;
711 
712   /* preconditioned vectors (note we don't use VEC_OFFSET) */
713   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->prevecs_user_work[nwork], 0, NULL));
714   for (k = 0; k < nalloc; k++) pipefgmres->prevecs[it + k] = pipefgmres->prevecs_user_work[nwork][k];
715 
716   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->zvecs_user_work[nwork], 0, NULL));
717   for (k = 0; k < nalloc; k++) pipefgmres->zvecs[it + k] = pipefgmres->zvecs_user_work[nwork][k];
718 
719   /* increment the number of work vector chunks */
720   pipefgmres->nwork_alloc++;
721   PetscFunctionReturn(0);
722 }
723 
724 /*@
725   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver.
726 
727   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there).
728 
729 Logically Collective on ksp
730 
731 Input Parameters:
732 +  ksp - the Krylov space context
733 -  shift - the shift
734 
735 Level: intermediate
736 
737 Options Database:
738 . -ksp_pipefgmres_shift <shift> - set the shift parameter
739 
740 .seealso: `KSPComputeEigenvalues()`
741 @*/
742 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp, PetscScalar shift) {
743   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
744 
745   PetscFunctionBegin;
746   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
747   PetscValidLogicalCollectiveScalar(ksp, shift, 2);
748   pipefgmres->shift = shift;
749   PetscFunctionReturn(0);
750 }
751