xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision b94d7ded0a05f1bbd5e48daa6f92b28259c75b44)
1 /*
2   Contributed by Patrick Sanan and Sascha M. Schnepp
3 */
4 
5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h>       /*I  "petscksp.h"  I*/
6 
7 static PetscBool  cited = PETSC_FALSE;
8 static const char citation[] =
9   "@article{SSM2016,\n"
10   "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
11   "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
12   "  journal = {SIAM Journal on Scientific Computing},\n"
13   "  volume = {38},\n"
14   "  number = {5},\n"
15   "  pages = {C441-C470},\n"
16   "  year = {2016},\n"
17   "  doi = {10.1137/15M1049130},\n"
18   "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
19   "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
20   "}\n";
21 
22 #define PIPEFGMRES_DELTA_DIRECTIONS 10
23 #define PIPEFGMRES_DEFAULT_MAXK     30
24 
25 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt);
26 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*);
27 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
28 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
29 
30 /*
31 
32     KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres.
33 
34     This is called once, usually automatically by KSPSolve() or KSPSetUp(),
35     but can be called directly by KSPSetUp().
36 
37 */
38 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
39 {
40   PetscInt       k;
41   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
42   const PetscInt max_k = pipefgmres->max_k;
43 
44   PetscFunctionBegin;
45   PetscCall(KSPSetUp_GMRES(ksp));
46 
47   PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs));
48   PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work));
49   PetscCall(PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*))));
50 
51   PetscCall(KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL));
52   PetscCall(PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]));
53   for (k=0; k < pipefgmres->vv_allocated; k++) {
54     pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
55   }
56 
57   PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs));
58   PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work));
59   PetscCall(PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*))));
60 
61   PetscCall(PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux));
62   PetscCall(PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*))));
63 
64   PetscCall(KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL));
65   PetscCall(PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]));
66   for (k=0; k < pipefgmres->vv_allocated; k++) {
67     pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
68   }
69 
70   PetscFunctionReturn(0);
71 }
72 
73 /*
74 
75     KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart.  Return residual
76                   history if requested.
77 
78     input parameters:
79 .        pipefgmres  - structure containing parameters and work areas
80 
81     output parameters:
82 .        itcount - number of iterations used.  If null, ignored.
83 .        converged - 0 if not converged
84 
85     Notes:
86     On entry, the value in vector VEC_VV(0) should be
87     the initial residual.
88 
89 */
90 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp)
91 {
92   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
93   PetscReal      res_norm;
94   PetscReal      hapbnd,tt;
95   PetscScalar    *hh,*hes,*lhh,shift = pipefgmres->shift;
96   PetscBool      hapend = PETSC_FALSE;  /* indicates happy breakdown ending */
97   PetscInt       loc_it;                /* local count of # of dir. in Krylov space */
98   PetscInt       max_k = pipefgmres->max_k; /* max # of directions Krylov space */
99   PetscInt       i,j,k;
100   Mat            Amat,Pmat;
101   Vec            Q,W; /* Pipelining vectors */
102   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
103 
104   PetscFunctionBegin;
105   if (itcount) *itcount = 0;
106 
107   /* Assign simpler names to these vectors, allocated as pipelining workspace */
108   Q = VEC_Q;
109   W = VEC_W;
110 
111   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
112   /* Note that we add an extra value here to allow for a single reduction */
113   if (!pipefgmres->orthogwork) { PetscCall(PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork));
114   }
115   lhh = pipefgmres->orthogwork;
116 
117   /* Number of pseudo iterations since last restart is the number
118      of prestart directions */
119   loc_it = 0;
120 
121   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
122      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
123      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
124      (loc_it -1) is passed, so the two are equivalent */
125   pipefgmres->it = (loc_it - 1);
126 
127   /* initial residual is in VEC_VV(0)  - compute its norm*/
128   PetscCall(VecNorm(VEC_VV(0),NORM_2,&res_norm));
129 
130   /* first entry in right-hand-side of hessenberg system is just
131      the initial residual norm */
132   *RS(0) = res_norm;
133 
134   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
135   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
136   else ksp->rnorm = 0;
137   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
138   PetscCall(KSPLogResidualHistory(ksp,ksp->rnorm));
139   PetscCall(KSPMonitor(ksp,ksp->its,ksp->rnorm));
140 
141   /* check for the convergence - maybe the current guess is good enough */
142   PetscCall((*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP));
143   if (ksp->reason) {
144     if (itcount) *itcount = 0;
145     PetscFunctionReturn(0);
146   }
147 
148   /* scale VEC_VV (the initial residual) */
149   PetscCall(VecScale(VEC_VV(0),1.0/res_norm));
150 
151   /* Fill the pipeline */
152   PetscCall(KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it)));
153   PetscCall(PCGetOperators(ksp->pc,&Amat,&Pmat));
154   PetscCall(KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it)));
155   PetscCall(VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it))); /* Note shift */
156 
157   /* MAIN ITERATION LOOP BEGINNING*/
158   /* keep iterating until we have converged OR generated the max number
159      of directions OR reached the max number of iterations for the method */
160   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
161     if (loc_it) {
162       PetscCall(KSPLogResidualHistory(ksp,res_norm));
163       PetscCall(KSPMonitor(ksp,ksp->its,res_norm));
164     }
165     pipefgmres->it = (loc_it - 1);
166 
167     /* see if more space is needed for work vectors */
168     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
169       PetscCall(KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1));
170       /* (loc_it+1) is passed in as number of the first vector that should
171          be allocated */
172     }
173 
174     /* Note that these inner products are with "Z" now, so
175        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
176        not the value from the equivalent FGMRES run (even in exact arithmetic)
177        That is, the H we need for the Arnoldi relation is different from the
178        coefficients we use in the orthogonalization process,because of the shift */
179 
180     /* Do some local twiddling to allow for a single reduction */
181     for (i=0;i<loc_it+1;i++) {
182       redux[i] = VEC_VV(i);
183     }
184     redux[loc_it+1] = ZVEC(loc_it);
185 
186     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
187     PetscCall(VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh));
188 
189     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
190     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it))));
191 
192     /* The work to be overlapped with the inner products follows.
193        This is application of the preconditioner and the operator
194        to compute intermediate quantites which will be combined (locally)
195        with the results of the inner products.
196        */
197     PetscCall(KSP_PCApply(ksp,ZVEC(loc_it),Q));
198     PetscCall(PCGetOperators(ksp->pc,&Amat,&Pmat));
199     PetscCall(KSP_MatMult(ksp,Amat,Q,W));
200 
201     /* Compute inner products of the new direction with previous directions,
202        and the norm of the to-be-orthogonalized direction "Z".
203        This information is enough to build the required entries
204        of H. The inner product with VEC_VV(it_loc) is
205        *different* than in the standard FGMRES and need to be dealt with specially.
206        That is, for standard FGMRES the orthogonalization coefficients are the same
207        as the coefficients used in the Arnoldi relation to reconstruct, but here this
208        is not true (albeit only for the one entry of H which we "unshift" below. */
209 
210     /* Finish the dot product, retrieving the extra entry */
211     PetscCall(VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh));
212     tt = PetscRealPart(lhh[loc_it+1]);
213 
214     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
215       Note that the Hessenberg entries require a shift, as these are for the
216       relation AU = VH, which is wrt unshifted basis vectors */
217     hh = HH(0,loc_it); hes=HES(0,loc_it);
218     for (j=0; j<loc_it; j++) {
219       hh[j]  = lhh[j];
220       hes[j] = lhh[j];
221     }
222     hh[loc_it]  = lhh[loc_it] + shift;
223     hes[loc_it] = lhh[loc_it] + shift;
224 
225     /* we delay applying the shift here */
226     for (j=0; j<=loc_it; j++) {
227       lhh[j]        = -lhh[j]; /* flip sign */
228     }
229 
230     /* Compute the norm of the un-normalized new direction using the rearranged formula
231        Note that these are shifted ("barred") quantities */
232     for (k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
233     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
234     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0 ;
235     if (tt < 0.0) {
236       /* If we detect square root breakdown in the norm, we must restart the algorithm.
237          Here this means we simply break the current loop and reconstruct the solution
238          using the basis we have computed thus far. Note that by breaking immediately,
239          we do not update the iteration count, so computation done in this iteration
240          should be disregarded.
241          */
242       PetscCall(PetscInfo(ksp,"Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n",ksp->its,(double)tt));
243       break;
244     } else {
245       tt = PetscSqrtReal(tt);
246     }
247 
248     /* new entry in hessenburg is the 2-norm of our new direction */
249     hh[loc_it+1]  = tt;
250     hes[loc_it+1] = tt;
251 
252     /* The recurred computation for the new direction
253        The division by tt is delayed to the happy breakdown check later
254        Note placement BEFORE the unshift
255        */
256     PetscCall(VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1)));
257     PetscCall(VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0)));
258     /* (VEC_VV(loc_it+1) is not normalized yet) */
259 
260     /* The recurred computation for the preconditioned vector (u) */
261     PetscCall(VecCopy(Q,PREVEC(loc_it+1)));
262     PetscCall(VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0)));
263     PetscCall(VecScale(PREVEC(loc_it+1),1.0/tt));
264 
265     /* Unshift an entry in the GS coefficients ("removing the bar") */
266     lhh[loc_it]         -= shift;
267 
268     /* The recurred computation for z (Au)
269        Note placement AFTER the "unshift" */
270     PetscCall(VecCopy(W,ZVEC(loc_it+1)));
271     PetscCall(VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0)));
272     PetscCall(VecScale(ZVEC(loc_it+1),1.0/tt));
273 
274     /* Happy Breakdown Check */
275     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
276     /* RS(loc_it) contains the res_norm from the last iteration  */
277     hapbnd = PetscMin(pipefgmres->haptol,hapbnd);
278     if (tt > hapbnd) {
279       /* scale new direction by its norm  */
280       PetscCall(VecScale(VEC_VV(loc_it+1),1.0/tt));
281     } else {
282       /* This happens when the solution is exactly reached. */
283       /* So there is no new direction... */
284       PetscCall(VecSet(VEC_TEMP,0.0));     /* set VEC_TEMP to 0 */
285       hapend = PETSC_TRUE;
286     }
287     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
288        current solution would not be exact if HES was singular.  Note that
289        HH non-singular implies that HES is not singular, and HES is guaranteed
290        to be nonsingular when PREVECS are linearly independent and A is
291        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
292        of HES). So we should really add a check to verify that HES is nonsingular.*/
293 
294     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
295        here to check that the hessenberg matrix is indeed non-singular (since
296        FGMRES does not guarantee this) */
297 
298     /* Now apply rotations to new col of hessenberg (and right side of system),
299        calculate new rotation, and get new residual norm at the same time*/
300     PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm));
301     if (ksp->reason) break;
302 
303     loc_it++;
304     pipefgmres->it = (loc_it-1);   /* Add this here in case it has converged */
305 
306     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
307     ksp->its++;
308     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
309     else ksp->rnorm = 0;
310     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
311 
312     PetscCall((*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP));
313 
314     /* Catch error in happy breakdown and signal convergence and break from loop */
315     if (hapend) {
316       if (!ksp->reason) {
317         PetscCheck(!ksp->errorifnotconverged,PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm);
318         ksp->reason = KSP_DIVERGED_BREAKDOWN;
319         break;
320       }
321     }
322   }
323   /* END OF ITERATION LOOP */
324   PetscCall(KSPLogResidualHistory(ksp,ksp->rnorm));
325 
326   /*
327      Monitor if we know that we will not return for a restart */
328   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) {
329     PetscCall(KSPMonitor(ksp,ksp->its,ksp->rnorm));
330   }
331 
332   if (itcount) *itcount = loc_it;
333 
334   /*
335     Down here we have to solve for the "best" coefficients of the Krylov
336     columns, add the solution values together, and possibly unwind the
337     preconditioning from the solution
338    */
339 
340   /* Form the solution (or the solution so far) */
341   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln
342      properly navigates */
343 
344   PetscCall(KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1));
345 
346   PetscFunctionReturn(0);
347 }
348 
349 /*
350     KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method.
351 
352    Input Parameter:
353 .     ksp - the Krylov space object that was set to use pipefgmres
354 
355    Output Parameter:
356 .     outits - number of iterations used
357 
358 */
359 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
360 {
361   PetscInt       its,itcount;
362   KSP_PIPEFGMRES *pipefgmres    = (KSP_PIPEFGMRES*)ksp->data;
363   PetscBool      guess_zero = ksp->guess_zero;
364 
365   PetscFunctionBegin;
366   /* We have not checked these routines for use with complex numbers. The inner products
367      are likely not defined correctly for that case */
368   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX),PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars");
369 
370   PetscCall(PetscCitationsRegister(citation,&cited));
371 
372   PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd,PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
373   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
374   ksp->its = 0;
375   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
376 
377   itcount     = 0;
378   ksp->reason = KSP_CONVERGED_ITERATING;
379   while (!ksp->reason) {
380     PetscCall(KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs));
381     PetscCall(KSPPIPEFGMRESCycle(&its,ksp));
382     itcount += its;
383     if (itcount >= ksp->max_it) {
384       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
385       break;
386     }
387     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
388   }
389   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
390   PetscFunctionReturn(0);
391 }
392 
393 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
394 {
395   PetscFunctionBegin;
396   PetscCall(KSPReset_PIPEFGMRES(ksp));
397   PetscCall(KSPDestroy_GMRES(ksp));
398   PetscFunctionReturn(0);
399 }
400 
401 /*
402     KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the
403                       current iterates.
404 
405     Input parameters:
406         nrs - work area of size it + 1.
407         vguess  - index of initial guess
408         vdest - index of result.  Note that vguess may == vdest (replace
409                 guess with the solution).
410         it - HH upper triangular part is a block of size (it+1) x (it+1)
411 
412      This is an internal routine that knows about the PIPEFGMRES internals.
413  */
414 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it)
415 {
416   PetscScalar    tt;
417   PetscInt       k,j;
418   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
419 
420   PetscFunctionBegin;
421   /* Solve for solution vector that minimizes the residual */
422 
423   if (it < 0) {                                 /* no pipefgmres steps have been performed */
424     PetscCall(VecCopy(vguess,vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */
425     PetscFunctionReturn(0);
426   }
427 
428   /* solve the upper triangular system - RS is the right side and HH is
429      the upper triangular matrix  - put soln in nrs */
430   if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it);
431   else nrs[it] = 0.0;
432 
433   for (k=it-1; k>=0; k--) {
434     tt = *RS(k);
435     for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j];
436     nrs[k] = tt / *HH(k,k);
437   }
438 
439   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
440   PetscCall(VecZeroEntries(VEC_TEMP));
441   PetscCall(VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0)));
442 
443   /* add solution to previous solution */
444   if (vdest == vguess) {
445     PetscCall(VecAXPY(vdest,1.0,VEC_TEMP));
446   } else {
447     PetscCall(VecWAXPY(vdest,1.0,VEC_TEMP,vguess));
448   }
449   PetscFunctionReturn(0);
450 }
451 
452 /*
453 
454     KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
455                             Return new residual.
456 
457     input parameters:
458 
459 .        ksp -    Krylov space object
460 .        it  -    plane rotations are applied to the (it+1)th column of the
461                   modified hessenberg (i.e. HH(:,it))
462 .        hapend - PETSC_FALSE not happy breakdown ending.
463 
464     output parameters:
465 .        res - the new residual
466 
467  */
468 /*
469 .  it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this
470  */
471 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res)
472 {
473   PetscScalar    *hh,*cc,*ss,*rs;
474   PetscInt       j;
475   PetscReal      hapbnd;
476   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
477 
478   PetscFunctionBegin;
479   hh = HH(0,it);   /* pointer to beginning of column to update */
480   cc = CC(0);      /* beginning of cosine rotations */
481   ss = SS(0);      /* beginning of sine rotations */
482   rs = RS(0);      /* right hand side of least squares system */
483 
484   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
485   for (j=0; j<=it+1; j++) *HES(j,it) = hh[j];
486 
487   /* check for the happy breakdown */
488   hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol);
489   if (PetscAbsScalar(hh[it+1]) < hapbnd) {
490     PetscCall(PetscInfo(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it))));
491     *hapend = PETSC_TRUE;
492   }
493 
494   /* Apply all the previously computed plane rotations to the new column
495      of the Hessenberg matrix */
496   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
497      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
498 
499   for (j=0; j<it; j++) {
500     PetscScalar hhj = hh[j];
501     hh[j]   = PetscConj(cc[j])*hhj + ss[j]*hh[j+1];
502     hh[j+1] =          -ss[j] *hhj + cc[j]*hh[j+1];
503   }
504 
505   /*
506     compute the new plane rotation, and apply it to:
507      1) the right-hand-side of the Hessenberg system (RS)
508         note: it affects RS(it) and RS(it+1)
509      2) the new column of the Hessenberg matrix
510         note: it affects HH(it,it) which is currently pointed to
511         by hh and HH(it+1, it) (*(hh+1))
512     thus obtaining the updated value of the residual...
513   */
514 
515   /* compute new plane rotation */
516 
517   if (!*hapend) {
518     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1])));
519     if (delta == 0.0) {
520       ksp->reason = KSP_DIVERGED_NULL;
521       PetscFunctionReturn(0);
522     }
523 
524     cc[it] = hh[it] / delta;    /* new cosine value */
525     ss[it] = hh[it+1] / delta;  /* new sine value */
526 
527     hh[it]   = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1];
528     rs[it+1] = -ss[it]*rs[it];
529     rs[it]   = PetscConj(cc[it])*rs[it];
530     *res     = PetscAbsScalar(rs[it+1]);
531   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
532             another rotation matrix (so RH doesn't change).  The new residual is
533             always the new sine term times the residual from last time (RS(it)),
534             but now the new sine rotation would be zero...so the residual should
535             be zero...so we will multiply "zero" by the last residual.  This might
536             not be exactly what we want to do here -could just return "zero". */
537 
538     *res = 0.0;
539   }
540   PetscFunctionReturn(0);
541 }
542 
543 /*
544    KSPBuildSolution_PIPEFGMRES
545 
546      Input Parameter:
547 .     ksp - the Krylov space object
548 .     ptr-
549 
550    Output Parameter:
551 .     result - the solution
552 
553    Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle
554    calls directly.
555 
556 */
557 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result)
558 {
559   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
560 
561   PetscFunctionBegin;
562   if (!ptr) {
563     if (!pipefgmres->sol_temp) {
564       PetscCall(VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp));
565       PetscCall(PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp));
566     }
567     ptr = pipefgmres->sol_temp;
568   }
569   if (!pipefgmres->nrs) {
570     /* allocate the work area */
571     PetscCall(PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs));
572     PetscCall(PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar)));
573   }
574 
575   PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it));
576   if (result) *result = ptr;
577   PetscFunctionReturn(0);
578 }
579 
580 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
581 {
582   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
583   PetscBool      flg;
584   PetscScalar    shift;
585 
586   PetscFunctionBegin;
587   PetscCall(KSPSetFromOptions_GMRES(PetscOptionsObject,ksp));
588   PetscOptionsHeadBegin(PetscOptionsObject,"KSP pipelined FGMRES Options");
589   PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg));
590   if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp,shift));
591   PetscOptionsHeadEnd();
592   PetscFunctionReturn(0);
593 }
594 
595 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer)
596 {
597   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
598   PetscBool      iascii,isstring;
599 
600   PetscFunctionBegin;
601   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
602   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring));
603 
604   if (iascii) {
605     PetscCall(PetscViewerASCIIPrintf(viewer,"  restart=%" PetscInt_FMT "\n",pipefgmres->max_k));
606     PetscCall(PetscViewerASCIIPrintf(viewer,"  happy breakdown tolerance %g\n",(double)pipefgmres->haptol));
607 #if defined(PETSC_USE_COMPLEX)
608     PetscCall(PetscViewerASCIIPrintf(viewer,"  shift=%g+%gi\n",(double)PetscRealPart(pipefgmres->shift),(double)PetscImaginaryPart(pipefgmres->shift)));
609 #else
610     PetscCall(PetscViewerASCIIPrintf(viewer,"  shift=%g\n",(double)pipefgmres->shift));
611 #endif
612   } else if (isstring) {
613     PetscCall(PetscViewerStringSPrintf(viewer,"restart %" PetscInt_FMT,pipefgmres->max_k));
614 #if defined(PETSC_USE_COMPLEX)
615     PetscCall(PetscViewerStringSPrintf(viewer,"   shift=%g+%gi\n",(double)PetscRealPart(pipefgmres->shift),(double)PetscImaginaryPart(pipefgmres->shift)));
616 #else
617     PetscCall(PetscViewerStringSPrintf(viewer,"   shift=%g\n",(double)pipefgmres->shift));
618 #endif
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
624 {
625   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
626   PetscInt         i;
627 
628   PetscFunctionBegin;
629   PetscCall(PetscFree(pipefgmres->prevecs));
630   PetscCall(PetscFree(pipefgmres->zvecs));
631   for (i=0; i<pipefgmres->nwork_alloc; i++) {
632     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]));
633     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]));
634   }
635   PetscCall(PetscFree(pipefgmres->prevecs_user_work));
636   PetscCall(PetscFree(pipefgmres->zvecs_user_work));
637   PetscCall(PetscFree(pipefgmres->redux));
638   PetscCall(KSPReset_GMRES(ksp));
639   PetscFunctionReturn(0);
640 }
641 
642 /*MC
643    KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method.
644 
645    A flexible, 1-stage pipelined variant of GMRES.
646 
647    Options Database Keys:
648 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
649 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
650 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
651 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
652                              vectors are allocated as needed)
653 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
654 
655    Level: intermediate
656 
657    Notes:
658 
659    This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter.
660 
661    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
662 
663    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES).
664    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
665    See the FAQ on the PETSc website for details.
666 
667    Developer Notes:
668     This class is subclassed off of KSPGMRES.
669 
670    Reference:
671     P. Sanan, S.M. Schnepp, and D.A. May,
672     "Pipelined, Flexible Krylov Subspace Methods,"
673     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
674     DOI: 10.1137/15M1049130
675 
676 .seealso: `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES`
677           `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()`
678 M*/
679 
680 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
681 {
682   KSP_PIPEFGMRES *pipefgmres;
683 
684   PetscFunctionBegin;
685   PetscCall(PetscNewLog(ksp,&pipefgmres));
686 
687   ksp->data                              = (void*)pipefgmres;
688   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
689   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
690   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
691   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
692   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
693   ksp->ops->view                         = KSPView_PIPEFGMRES;
694   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
695   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
696   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
697 
698   PetscCall(KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3));
699   PetscCall(KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1));
700 
701   PetscCall(PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES));
702   PetscCall(PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES));
703   PetscCall(PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES));
704 
705   pipefgmres->nextra_vecs    = 1;
706   pipefgmres->haptol         = 1.0e-30;
707   pipefgmres->q_preallocate  = 0;
708   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
709   pipefgmres->orthog         = NULL;
710   pipefgmres->nrs            = NULL;
711   pipefgmres->sol_temp       = NULL;
712   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
713   pipefgmres->Rsvd           = NULL;
714   pipefgmres->orthogwork     = NULL;
715   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
716   pipefgmres->shift          = 1.0;
717   PetscFunctionReturn(0);
718 }
719 
720 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it)
721 {
722   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
723   PetscInt       nwork   = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
724   PetscInt       nalloc;                            /* number to allocate */
725   PetscInt       k;
726 
727   PetscFunctionBegin;
728   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
729                                       in a single chunk */
730 
731   /* Adjust the number to allocate to make sure that we don't exceed the
732      number of available slots (pipefgmres->vecs_allocated)*/
733   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) {
734     nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
735   }
736   if (!nalloc) PetscFunctionReturn(0);
737 
738   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
739 
740   /* work vectors */
741   PetscCall(KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL));
742   PetscCall(PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]));
743   for (k=0; k < nalloc; k++) {
744     pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k];
745   }
746   /* specify size of chunk allocated */
747   pipefgmres->mwork_alloc[nwork] = nalloc;
748 
749   /* preconditioned vectors (note we don't use VEC_OFFSET) */
750   PetscCall(KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL));
751   PetscCall(PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]));
752   for (k=0; k < nalloc; k++) {
753     pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k];
754   }
755 
756   PetscCall(KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL));
757   PetscCall(PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]));
758   for (k=0; k < nalloc; k++) {
759     pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k];
760   }
761 
762   /* increment the number of work vector chunks */
763   pipefgmres->nwork_alloc++;
764   PetscFunctionReturn(0);
765 }
766 
767 /*@
768   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver.
769 
770   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there).
771 
772 Logically Collective on ksp
773 
774 Input Parameters:
775 +  ksp - the Krylov space context
776 -  shift - the shift
777 
778 Level: intermediate
779 
780 Options Database:
781 . -ksp_pipefgmres_shift <shift> - set the shift parameter
782 
783 .seealso: `KSPComputeEigenvalues()`
784 @*/
785 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift)
786 {
787   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
788 
789   PetscFunctionBegin;
790   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
791   PetscValidLogicalCollectiveScalar(ksp,shift,2);
792   pipefgmres->shift = shift;
793   PetscFunctionReturn(0);
794 }
795