xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision efa12513287cff49a2b9648ae83199dcbfaad71a)
1 /*
2   Contributed by Patrick Sanan and Sascha M. Schnepp
3 */
4 
5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h>       /*I  "petscksp.h"  I*/
6 
7 static PetscBool  cited = PETSC_FALSE;
8 static const char citation[] =
9   "@article{SSM2016,\n"
10   "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
11   "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
12   "  journal = {SIAM Journal on Scientific Computing},\n"
13   "  volume = {38},\n"
14   "  number = {5},\n"
15   "  pages = {C441-C470},\n"
16   "  year = {2016},\n"
17   "  doi = {10.1137/15M1049130},\n"
18   "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
19   "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
20   "}\n";
21 
22 #define PIPEFGMRES_DELTA_DIRECTIONS 10
23 #define PIPEFGMRES_DEFAULT_MAXK     30
24 
25 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt);
26 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*);
27 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
28 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
29 
30 /*
31 
32     KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres.
33 
34     This is called once, usually automatically by KSPSolve() or KSPSetUp(),
35     but can be called directly by KSPSetUp().
36 
37 */
38 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
39 {
40   PetscErrorCode ierr;
41   PetscInt       k;
42   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
43   const PetscInt max_k = pipefgmres->max_k;
44 
45   PetscFunctionBegin;
46   ierr = KSPSetUp_GMRES(ksp);CHKERRQ(ierr);
47 
48   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs);CHKERRQ(ierr);
49   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work);CHKERRQ(ierr);
50   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr);
51 
52   ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL);CHKERRQ(ierr);
53   ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]);CHKERRQ(ierr);
54   for (k=0; k < pipefgmres->vv_allocated; k++) {
55     pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
56   }
57 
58   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs);CHKERRQ(ierr);
59   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work);CHKERRQ(ierr);
60   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr);
61 
62   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux);CHKERRQ(ierr);
63   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)));CHKERRQ(ierr);
64 
65   ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL);CHKERRQ(ierr);
66   ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]);CHKERRQ(ierr);
67   for (k=0; k < pipefgmres->vv_allocated; k++) {
68     pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
69   }
70 
71   PetscFunctionReturn(0);
72 }
73 
74 /*
75 
76     KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart.  Return residual
77                   history if requested.
78 
79     input parameters:
80 .        pipefgmres  - structure containing parameters and work areas
81 
82     output parameters:
83 .        itcount - number of iterations used.  If null, ignored.
84 .        converged - 0 if not converged
85 
86     Notes:
87     On entry, the value in vector VEC_VV(0) should be
88     the initial residual.
89 
90 
91 */
92 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp)
93 {
94   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
95   PetscReal      res_norm;
96   PetscReal      hapbnd,tt;
97   PetscScalar    *hh,*hes,*lhh,shift = pipefgmres->shift;
98   PetscBool      hapend = PETSC_FALSE;  /* indicates happy breakdown ending */
99   PetscErrorCode ierr;
100   PetscInt       loc_it;                /* local count of # of dir. in Krylov space */
101   PetscInt       max_k = pipefgmres->max_k; /* max # of directions Krylov space */
102   PetscInt       i,j,k;
103   Mat            Amat,Pmat;
104   Vec            Q,W; /* Pipelining vectors */
105   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
106 
107   PetscFunctionBegin;
108   if (itcount) *itcount = 0;
109 
110   /* Assign simpler names to these vectors, allocated as pipelining workspace */
111   Q = VEC_Q;
112   W = VEC_W;
113 
114   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
115   /* Note that we add an extra value here to allow for a single reduction */
116   if (!pipefgmres->orthogwork) { ierr = PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);CHKERRQ(ierr);
117   }
118   lhh = pipefgmres->orthogwork;
119 
120   /* Number of pseudo iterations since last restart is the number
121      of prestart directions */
122   loc_it = 0;
123 
124   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
125      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
126      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
127      (loc_it -1) is passed, so the two are equivalent */
128   pipefgmres->it = (loc_it - 1);
129 
130   /* initial residual is in VEC_VV(0)  - compute its norm*/
131   ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr);
132 
133   /* first entry in right-hand-side of hessenberg system is just
134      the initial residual norm */
135   *RS(0) = res_norm;
136 
137   ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
138   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
139   else ksp->rnorm = 0;
140   ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
141   ierr = KSPLogResidualHistory(ksp,ksp->rnorm);CHKERRQ(ierr);
142   ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm);CHKERRQ(ierr);
143 
144   /* check for the convergence - maybe the current guess is good enough */
145   ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
146   if (ksp->reason) {
147     if (itcount) *itcount = 0;
148     PetscFunctionReturn(0);
149   }
150 
151   /* scale VEC_VV (the initial residual) */
152   ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr);
153 
154   /* Fill the pipeline */
155   ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr);
156   ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
157   ierr = KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));CHKERRQ(ierr);
158   ierr = VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it));CHKERRQ(ierr); /* Note shift */
159 
160   /* MAIN ITERATION LOOP BEGINNING*/
161   /* keep iterating until we have converged OR generated the max number
162      of directions OR reached the max number of iterations for the method */
163   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
164     if (loc_it) {
165       ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
166       ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
167     }
168     pipefgmres->it = (loc_it - 1);
169 
170     /* see if more space is needed for work vectors */
171     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
172       ierr = KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr);
173       /* (loc_it+1) is passed in as number of the first vector that should
174          be allocated */
175     }
176 
177     /* Note that these inner products are with "Z" now, so
178        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
179        not the value from the equivalent FGMRES run (even in exact arithmetic)
180        That is, the H we need for the Arnoldi relation is different from the
181        coefficients we use in the orthogonalization process,because of the shift */
182 
183     /* Do some local twiddling to allow for a single reduction */
184     for (i=0;i<loc_it+1;i++){
185       redux[i] = VEC_VV(i);
186     }
187     redux[loc_it+1] = ZVEC(loc_it);
188 
189     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
190     ierr = VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr);
191 
192     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
193     ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));CHKERRQ(ierr);
194 
195     /* The work to be overlapped with the inner products follows.
196        This is application of the preconditioner and the operator
197        to compute intermediate quantites which will be combined (locally)
198        with the results of the inner products.
199        */
200     ierr = KSP_PCApply(ksp,ZVEC(loc_it),Q);CHKERRQ(ierr);
201     ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
202     ierr = KSP_MatMult(ksp,Amat,Q,W);CHKERRQ(ierr);
203 
204     /* Compute inner products of the new direction with previous directions,
205        and the norm of the to-be-orthogonalized direction "Z".
206        This information is enough to build the required entries
207        of H. The inner product with VEC_VV(it_loc) is
208        *different* than in the standard FGMRES and need to be dealt with specially.
209        That is, for standard FGMRES the orthogonalization coefficients are the same
210        as the coefficients used in the Arnoldi relation to reconstruct, but here this
211        is not true (albeit only for the one entry of H which we "unshift" below. */
212 
213     /* Finish the dot product, retrieving the extra entry */
214     ierr = VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr);
215     tt = PetscRealPart(lhh[loc_it+1]);
216 
217     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
218       Note that the Hessenberg entries require a shift, as these are for the
219       relation AU = VH, which is wrt unshifted basis vectors */
220     hh = HH(0,loc_it); hes=HES(0,loc_it);
221     for (j=0; j<loc_it; j++) {
222       hh[j]  = lhh[j];
223       hes[j] = lhh[j];
224     }
225     hh[loc_it]  = lhh[loc_it] + shift;
226     hes[loc_it] = lhh[loc_it] + shift;
227 
228     /* we delay applying the shift here */
229     for (j=0; j<=loc_it; j++) {
230       lhh[j]        = -lhh[j]; /* flip sign */
231     }
232 
233     /* Compute the norm of the un-normalized new direction using the rearranged formula
234        Note that these are shifted ("barred") quantities */
235     for (k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
236     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
237     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0 ;
238     if (tt < 0.0) {
239       /* If we detect square root breakdown in the norm, we must restart the algorithm.
240          Here this means we simply break the current loop and reconstruct the solution
241          using the basis we have computed thus far. Note that by breaking immediately,
242          we do not update the iteration count, so computation done in this iteration
243          should be disregarded.
244          */
245       ierr = PetscInfo2(ksp,"Restart due to square root breakdown at it = %D, tt=%g\n",ksp->its,(double)tt);CHKERRQ(ierr);
246       break;
247     } else {
248       tt = PetscSqrtReal(tt);
249     }
250 
251     /* new entry in hessenburg is the 2-norm of our new direction */
252     hh[loc_it+1]  = tt;
253     hes[loc_it+1] = tt;
254 
255     /* The recurred computation for the new direction
256        The division by tt is delayed to the happy breakdown check later
257        Note placement BEFORE the unshift
258        */
259     ierr = VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));CHKERRQ(ierr);
260     ierr = VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));CHKERRQ(ierr);
261     /* (VEC_VV(loc_it+1) is not normalized yet) */
262 
263     /* The recurred computation for the preconditioned vector (u) */
264     ierr = VecCopy(Q,PREVEC(loc_it+1));CHKERRQ(ierr);
265     ierr = VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));CHKERRQ(ierr);
266     ierr = VecScale(PREVEC(loc_it+1),1.0/tt);CHKERRQ(ierr);
267 
268     /* Unshift an entry in the GS coefficients ("removing the bar") */
269     lhh[loc_it]         -= shift;
270 
271     /* The recurred computation for z (Au)
272        Note placement AFTER the "unshift" */
273     ierr = VecCopy(W,ZVEC(loc_it+1));CHKERRQ(ierr);
274     ierr = VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));CHKERRQ(ierr);
275     ierr = VecScale(ZVEC(loc_it+1),1.0/tt);CHKERRQ(ierr);
276 
277     /* Happy Breakdown Check */
278     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
279     /* RS(loc_it) contains the res_norm from the last iteration  */
280     hapbnd = PetscMin(pipefgmres->haptol,hapbnd);
281     if (tt > hapbnd) {
282       /* scale new direction by its norm  */
283       ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr);
284     } else {
285       /* This happens when the solution is exactly reached. */
286       /* So there is no new direction... */
287       ierr   = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr);     /* set VEC_TEMP to 0 */
288       hapend = PETSC_TRUE;
289     }
290     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
291        current solution would not be exact if HES was singular.  Note that
292        HH non-singular implies that HES is not singular, and HES is guaranteed
293        to be nonsingular when PREVECS are linearly independent and A is
294        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
295        of HES). So we should really add a check to verify that HES is nonsingular.*/
296 
297     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
298        here to check that the hessenberg matrix is indeed non-singular (since
299        FGMRES does not guarantee this) */
300 
301     /* Now apply rotations to new col of hessenberg (and right side of system),
302        calculate new rotation, and get new residual norm at the same time*/
303     ierr = KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);CHKERRQ(ierr);
304     if (ksp->reason) break;
305 
306     loc_it++;
307     pipefgmres->it = (loc_it-1);   /* Add this here in case it has converged */
308 
309     ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
310     ksp->its++;
311     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
312     else ksp->rnorm = 0;
313     ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
314 
315     ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
316 
317     /* Catch error in happy breakdown and signal convergence and break from loop */
318     if (hapend) {
319       if (!ksp->reason) {
320         if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm);
321         else {
322           ksp->reason = KSP_DIVERGED_BREAKDOWN;
323           break;
324         }
325       }
326     }
327   }
328   /* END OF ITERATION LOOP */
329   ierr = KSPLogResidualHistory(ksp,ksp->rnorm);CHKERRQ(ierr);
330 
331   /*
332      Monitor if we know that we will not return for a restart */
333   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) {
334     ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm);CHKERRQ(ierr);
335   }
336 
337   if (itcount) *itcount = loc_it;
338 
339   /*
340     Down here we have to solve for the "best" coefficients of the Krylov
341     columns, add the solution values together, and possibly unwind the
342     preconditioning from the solution
343    */
344 
345   /* Form the solution (or the solution so far) */
346   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln
347      properly navigates */
348 
349   ierr = KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr);
350 
351   PetscFunctionReturn(0);
352 }
353 
354 /*
355     KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method.
356 
357 
358    Input Parameter:
359 .     ksp - the Krylov space object that was set to use pipefgmres
360 
361    Output Parameter:
362 .     outits - number of iterations used
363 
364 */
365 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
366 {
367   PetscErrorCode ierr;
368   PetscInt       its,itcount;
369   KSP_PIPEFGMRES *pipefgmres    = (KSP_PIPEFGMRES*)ksp->data;
370   PetscBool      guess_zero = ksp->guess_zero;
371 
372   PetscFunctionBegin;
373   /* We have not checked these routines for use with complex numbers. The inner products
374      are likely not defined correctly for that case */
375   if (PetscDefined(PETSC_USE_COMPLEX) && !PetscDefined(PETSC_SKIP_COMPLEX)) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars");
376 
377   ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr);
378 
379   if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
380   ierr     = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
381   ksp->its = 0;
382   ierr     = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
383 
384   itcount     = 0;
385   ksp->reason = KSP_CONVERGED_ITERATING;
386   while (!ksp->reason) {
387     ierr     = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr);
388     ierr     = KSPPIPEFGMRESCycle(&its,ksp);CHKERRQ(ierr);
389     itcount += its;
390     if (itcount >= ksp->max_it) {
391       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
392       break;
393     }
394     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
395   }
396   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
397   PetscFunctionReturn(0);
398 }
399 
400 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
401 {
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   ierr = KSPReset_PIPEFGMRES(ksp);CHKERRQ(ierr);
406   ierr = KSPDestroy_GMRES(ksp);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 /*
411     KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the
412                       current iterates.
413 
414     Input parameters:
415         nrs - work area of size it + 1.
416         vguess  - index of initial guess
417         vdest - index of result.  Note that vguess may == vdest (replace
418                 guess with the solution).
419         it - HH upper triangular part is a block of size (it+1) x (it+1)
420 
421      This is an internal routine that knows about the PIPEFGMRES internals.
422  */
423 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it)
424 {
425   PetscScalar    tt;
426   PetscErrorCode ierr;
427   PetscInt       k,j;
428   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
429 
430   PetscFunctionBegin;
431   /* Solve for solution vector that minimizes the residual */
432 
433   if (it < 0) {                                 /* no pipefgmres steps have been performed */
434     ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */
435     PetscFunctionReturn(0);
436   }
437 
438   /* solve the upper triangular system - RS is the right side and HH is
439      the upper triangular matrix  - put soln in nrs */
440   if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it);
441   else nrs[it] = 0.0;
442 
443   for (k=it-1; k>=0; k--) {
444     tt = *RS(k);
445     for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j];
446     nrs[k] = tt / *HH(k,k);
447   }
448 
449   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
450   ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr);
451   ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr);
452 
453   /* add solution to previous solution */
454   if (vdest == vguess) {
455     ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr);
456   } else {
457     ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr);
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 /*
463 
464     KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
465                             Return new residual.
466 
467     input parameters:
468 
469 .        ksp -    Krylov space object
470 .        it  -    plane rotations are applied to the (it+1)th column of the
471                   modified hessenberg (i.e. HH(:,it))
472 .        hapend - PETSC_FALSE not happy breakdown ending.
473 
474     output parameters:
475 .        res - the new residual
476 
477  */
478 /*
479 .  it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this
480  */
481 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res)
482 {
483   PetscScalar    *hh,*cc,*ss,*rs;
484   PetscInt       j;
485   PetscReal      hapbnd;
486   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   hh = HH(0,it);   /* pointer to beginning of column to update */
491   cc = CC(0);      /* beginning of cosine rotations */
492   ss = SS(0);      /* beginning of sine rotations */
493   rs = RS(0);      /* right hand side of least squares system */
494 
495   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
496   for (j=0; j<=it+1; j++) *HES(j,it) = hh[j];
497 
498   /* check for the happy breakdown */
499   hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol);
500   if (PetscAbsScalar(hh[it+1]) < hapbnd) {
501     ierr    = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr);
502     *hapend = PETSC_TRUE;
503   }
504 
505   /* Apply all the previously computed plane rotations to the new column
506      of the Hessenberg matrix */
507   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
508      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
509 
510   for (j=0; j<it; j++) {
511     PetscScalar hhj = hh[j];
512     hh[j]   = PetscConj(cc[j])*hhj + ss[j]*hh[j+1];
513     hh[j+1] =          -ss[j] *hhj + cc[j]*hh[j+1];
514   }
515 
516   /*
517     compute the new plane rotation, and apply it to:
518      1) the right-hand-side of the Hessenberg system (RS)
519         note: it affects RS(it) and RS(it+1)
520      2) the new column of the Hessenberg matrix
521         note: it affects HH(it,it) which is currently pointed to
522         by hh and HH(it+1, it) (*(hh+1))
523     thus obtaining the updated value of the residual...
524   */
525 
526   /* compute new plane rotation */
527 
528   if (!*hapend) {
529     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1])));
530     if (delta == 0.0) {
531       ksp->reason = KSP_DIVERGED_NULL;
532       PetscFunctionReturn(0);
533     }
534 
535     cc[it] = hh[it] / delta;    /* new cosine value */
536     ss[it] = hh[it+1] / delta;  /* new sine value */
537 
538     hh[it]   = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1];
539     rs[it+1] = -ss[it]*rs[it];
540     rs[it]   = PetscConj(cc[it])*rs[it];
541     *res     = PetscAbsScalar(rs[it+1]);
542   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
543             another rotation matrix (so RH doesn't change).  The new residual is
544             always the new sine term times the residual from last time (RS(it)),
545             but now the new sine rotation would be zero...so the residual should
546             be zero...so we will multiply "zero" by the last residual.  This might
547             not be exactly what we want to do here -could just return "zero". */
548 
549     *res = 0.0;
550   }
551   PetscFunctionReturn(0);
552 }
553 
554 /*
555    KSPBuildSolution_PIPEFGMRES
556 
557      Input Parameter:
558 .     ksp - the Krylov space object
559 .     ptr-
560 
561    Output Parameter:
562 .     result - the solution
563 
564    Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle
565    calls directly.
566 
567 */
568 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result)
569 {
570   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
571   PetscErrorCode ierr;
572 
573   PetscFunctionBegin;
574   if (!ptr) {
575     if (!pipefgmres->sol_temp) {
576       ierr = VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp);CHKERRQ(ierr);
577       ierr = PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp);CHKERRQ(ierr);
578     }
579     ptr = pipefgmres->sol_temp;
580   }
581   if (!pipefgmres->nrs) {
582     /* allocate the work area */
583     ierr = PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs);CHKERRQ(ierr);
584     ierr = PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar));CHKERRQ(ierr);
585   }
586 
587   ierr = KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it);CHKERRQ(ierr);
588   if (result) *result = ptr;
589   PetscFunctionReturn(0);
590 }
591 
592 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
593 {
594   PetscErrorCode ierr;
595   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
596   PetscBool      flg;
597   PetscScalar    shift;
598 
599   PetscFunctionBegin;
600   ierr = KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);CHKERRQ(ierr);
601   ierr = PetscOptionsHead(PetscOptionsObject,"KSP pipelined FGMRES Options");CHKERRQ(ierr);
602   ierr = PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg);CHKERRQ(ierr);
603   if (flg) { ierr = KSPPIPEFGMRESSetShift(ksp,shift);CHKERRQ(ierr); }
604   ierr = PetscOptionsTail();CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer)
609 {
610   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
611   PetscErrorCode ierr;
612   PetscBool      iascii,isstring;
613 
614   PetscFunctionBegin;
615   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
617 
618   if (iascii) {
619     ierr = PetscViewerASCIIPrintf(viewer,"  restart=%D\n",pipefgmres->max_k);CHKERRQ(ierr);
620     ierr = PetscViewerASCIIPrintf(viewer,"  happy breakdown tolerance %g\n",(double)pipefgmres->haptol);CHKERRQ(ierr);
621 #if defined(PETSC_USE_COMPLEX)
622     ierr = PetscViewerASCIIPrintf(viewer,"  shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr);
623 #else
624     ierr = PetscViewerASCIIPrintf(viewer,"  shift=%g\n",pipefgmres->shift);CHKERRQ(ierr);
625 #endif
626   } else if (isstring) {
627     ierr = PetscViewerStringSPrintf(viewer,"restart %D",pipefgmres->max_k);CHKERRQ(ierr);
628 #if defined(PETSC_USE_COMPLEX)
629     ierr = PetscViewerStringSPrintf(viewer,"   shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr);
630 #else
631     ierr = PetscViewerStringSPrintf(viewer,"   shift=%g\n",pipefgmres->shift);CHKERRQ(ierr);
632 #endif
633   }
634   PetscFunctionReturn(0);
635 }
636 
637 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
638 {
639   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
640   PetscErrorCode   ierr;
641   PetscInt         i;
642 
643   PetscFunctionBegin;
644   ierr = PetscFree(pipefgmres->prevecs);CHKERRQ(ierr);
645   ierr = PetscFree(pipefgmres->zvecs);CHKERRQ(ierr);
646   for (i=0; i<pipefgmres->nwork_alloc; i++) {
647     ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]);CHKERRQ(ierr);
648     ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]);CHKERRQ(ierr);
649   }
650   ierr = PetscFree(pipefgmres->prevecs_user_work);CHKERRQ(ierr);
651   ierr = PetscFree(pipefgmres->zvecs_user_work);CHKERRQ(ierr);
652   ierr = PetscFree(pipefgmres->redux);CHKERRQ(ierr);
653   ierr = KSPReset_GMRES(ksp);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 /*MC
658    KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method.
659 
660    A flexible, 1-stage pipelined variant of GMRES.
661 
662    Options Database Keys:
663 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
664 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
665 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
666 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
667                              vectors are allocated as needed)
668 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
669 
670 
671    Level: intermediate
672 
673    Notes:
674 
675    This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter.
676 
677    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
678 
679    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES).
680    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
681    See the FAQ on the PETSc website for details.
682 
683    Developer Notes:
684     This class is subclassed off of KSPGMRES.
685 
686    Reference:
687     P. Sanan, S.M. Schnepp, and D.A. May,
688     "Pipelined, Flexible Krylov Subspace Methods,"
689     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
690     DOI: 10.1137/15M1049130
691 
692 .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES
693            KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEFGMRESSetShift()
694 M*/
695 
696 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
697 {
698   KSP_PIPEFGMRES *pipefgmres;
699   PetscErrorCode ierr;
700 
701   PetscFunctionBegin;
702   ierr = PetscNewLog(ksp,&pipefgmres);CHKERRQ(ierr);
703 
704   ksp->data                              = (void*)pipefgmres;
705   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
706   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
707   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
708   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
709   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
710   ksp->ops->view                         = KSPView_PIPEFGMRES;
711   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
712   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
713   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
714 
715   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3);CHKERRQ(ierr);
716   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);CHKERRQ(ierr);
717 
718   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);CHKERRQ(ierr);
719   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);CHKERRQ(ierr);
720   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES);CHKERRQ(ierr);
721 
722   pipefgmres->nextra_vecs    = 1;
723   pipefgmres->haptol         = 1.0e-30;
724   pipefgmres->q_preallocate  = 0;
725   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
726   pipefgmres->orthog         = NULL;
727   pipefgmres->nrs            = NULL;
728   pipefgmres->sol_temp       = NULL;
729   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
730   pipefgmres->Rsvd           = NULL;
731   pipefgmres->orthogwork     = NULL;
732   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
733   pipefgmres->shift          = 1.0;
734   PetscFunctionReturn(0);
735 }
736 
737 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it)
738 {
739   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
740   PetscInt       nwork   = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
741   PetscInt       nalloc;                            /* number to allocate */
742   PetscErrorCode ierr;
743   PetscInt       k;
744 
745   PetscFunctionBegin;
746   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
747                                       in a single chunk */
748 
749   /* Adjust the number to allocate to make sure that we don't exceed the
750      number of available slots (pipefgmres->vecs_allocated)*/
751   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) {
752     nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
753   }
754   if (!nalloc) PetscFunctionReturn(0);
755 
756   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
757 
758   /* work vectors */
759   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL);CHKERRQ(ierr);
760   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]);CHKERRQ(ierr);
761   for (k=0; k < nalloc; k++) {
762     pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k];
763   }
764   /* specify size of chunk allocated */
765   pipefgmres->mwork_alloc[nwork] = nalloc;
766 
767   /* preconditioned vectors (note we don't use VEC_OFFSET) */
768   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL);CHKERRQ(ierr);
769   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]);CHKERRQ(ierr);
770   for (k=0; k < nalloc; k++) {
771     pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k];
772   }
773 
774   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL);CHKERRQ(ierr);
775   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]);CHKERRQ(ierr);
776   for (k=0; k < nalloc; k++) {
777     pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k];
778   }
779 
780   /* increment the number of work vector chunks */
781   pipefgmres->nwork_alloc++;
782   PetscFunctionReturn(0);
783 }
784 
785 /*@
786   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver.
787 
788   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there).
789 
790 Logically Collective on ksp
791 
792 Input Parameters:
793 +  ksp - the Krylov space context
794 -  shift - the shift
795 
796 Level: intermediate
797 
798 Options Database:
799 . -ksp_pipefgmres_shift <shift>
800 
801 .seealso: KSPComputeEigenvalues()
802 @*/
803 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift)
804 {
805   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
806 
807   PetscFunctionBegin;
808   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
809   PetscValidLogicalCollectiveScalar(ksp,shift,2);
810   pipefgmres->shift = shift;
811   PetscFunctionReturn(0);
812 }
813