xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision a2fddd78f770fa4fc19a8af67e65be331f27d92b)
1 /*
2   Contributed by Patrick Sanan and Sascha M. Schnepp
3 */
4 
5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h>       /*I  "petscksp.h"  I*/
6 
7 static PetscBool  cited = PETSC_FALSE;
8 static const char citation[] =
9   "@article{SSM2016,\n"
10   "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
11   "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
12   "  journal = {SIAM Journal on Scientific Computing},\n"
13   "  volume = {38},\n"
14   "  number = {5},\n"
15   "  pages = {C441-C470},\n"
16   "  year = {2016},\n"
17   "  doi = {10.1137/15M1049130},\n"
18   "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
19   "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
20   "}\n";
21 
22 #define PIPEFGMRES_DELTA_DIRECTIONS 10
23 #define PIPEFGMRES_DEFAULT_MAXK     30
24 
25 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt);
26 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*);
27 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
28 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
29 
30 /*
31 
32     KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres.
33 
34     This is called once, usually automatically by KSPSolve() or KSPSetUp(),
35     but can be called directly by KSPSetUp().
36 
37 */
38 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
39 {
40   PetscErrorCode ierr;
41   PetscInt       k;
42   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
43   const PetscInt max_k = pipefgmres->max_k;
44 
45   PetscFunctionBegin;
46   ierr = KSPSetUp_GMRES(ksp);CHKERRQ(ierr);
47 
48   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs);CHKERRQ(ierr);
49   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work);CHKERRQ(ierr);
50   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr);
51 
52   ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL);CHKERRQ(ierr);
53   ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]);CHKERRQ(ierr);
54   for (k=0; k < pipefgmres->vv_allocated; k++) {
55     pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
56   }
57 
58   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs);CHKERRQ(ierr);
59   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work);CHKERRQ(ierr);
60   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr);
61 
62   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux);CHKERRQ(ierr);
63   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)));CHKERRQ(ierr);
64 
65   ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL);CHKERRQ(ierr);
66   ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]);CHKERRQ(ierr);
67   for (k=0; k < pipefgmres->vv_allocated; k++) {
68     pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
69   }
70 
71   PetscFunctionReturn(0);
72 }
73 
74 /*
75 
76     KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart.  Return residual
77                   history if requested.
78 
79     input parameters:
80 .        pipefgmres  - structure containing parameters and work areas
81 
82     output parameters:
83 .        itcount - number of iterations used.  If null, ignored.
84 .        converged - 0 if not converged
85 
86     Notes:
87     On entry, the value in vector VEC_VV(0) should be
88     the initial residual.
89 
90 */
91 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp)
92 {
93   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
94   PetscReal      res_norm;
95   PetscReal      hapbnd,tt;
96   PetscScalar    *hh,*hes,*lhh,shift = pipefgmres->shift;
97   PetscBool      hapend = PETSC_FALSE;  /* indicates happy breakdown ending */
98   PetscErrorCode ierr;
99   PetscInt       loc_it;                /* local count of # of dir. in Krylov space */
100   PetscInt       max_k = pipefgmres->max_k; /* max # of directions Krylov space */
101   PetscInt       i,j,k;
102   Mat            Amat,Pmat;
103   Vec            Q,W; /* Pipelining vectors */
104   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
105 
106   PetscFunctionBegin;
107   if (itcount) *itcount = 0;
108 
109   /* Assign simpler names to these vectors, allocated as pipelining workspace */
110   Q = VEC_Q;
111   W = VEC_W;
112 
113   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
114   /* Note that we add an extra value here to allow for a single reduction */
115   if (!pipefgmres->orthogwork) { ierr = PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);CHKERRQ(ierr);
116   }
117   lhh = pipefgmres->orthogwork;
118 
119   /* Number of pseudo iterations since last restart is the number
120      of prestart directions */
121   loc_it = 0;
122 
123   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
124      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
125      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
126      (loc_it -1) is passed, so the two are equivalent */
127   pipefgmres->it = (loc_it - 1);
128 
129   /* initial residual is in VEC_VV(0)  - compute its norm*/
130   ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr);
131 
132   /* first entry in right-hand-side of hessenberg system is just
133      the initial residual norm */
134   *RS(0) = res_norm;
135 
136   ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
137   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
138   else ksp->rnorm = 0;
139   ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
140   ierr = KSPLogResidualHistory(ksp,ksp->rnorm);CHKERRQ(ierr);
141   ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm);CHKERRQ(ierr);
142 
143   /* check for the convergence - maybe the current guess is good enough */
144   ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
145   if (ksp->reason) {
146     if (itcount) *itcount = 0;
147     PetscFunctionReturn(0);
148   }
149 
150   /* scale VEC_VV (the initial residual) */
151   ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr);
152 
153   /* Fill the pipeline */
154   ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr);
155   ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
156   ierr = KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));CHKERRQ(ierr);
157   ierr = VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it));CHKERRQ(ierr); /* Note shift */
158 
159   /* MAIN ITERATION LOOP BEGINNING*/
160   /* keep iterating until we have converged OR generated the max number
161      of directions OR reached the max number of iterations for the method */
162   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
163     if (loc_it) {
164       ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
165       ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
166     }
167     pipefgmres->it = (loc_it - 1);
168 
169     /* see if more space is needed for work vectors */
170     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
171       ierr = KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr);
172       /* (loc_it+1) is passed in as number of the first vector that should
173          be allocated */
174     }
175 
176     /* Note that these inner products are with "Z" now, so
177        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
178        not the value from the equivalent FGMRES run (even in exact arithmetic)
179        That is, the H we need for the Arnoldi relation is different from the
180        coefficients we use in the orthogonalization process,because of the shift */
181 
182     /* Do some local twiddling to allow for a single reduction */
183     for (i=0;i<loc_it+1;i++) {
184       redux[i] = VEC_VV(i);
185     }
186     redux[loc_it+1] = ZVEC(loc_it);
187 
188     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
189     ierr = VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr);
190 
191     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
192     ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));CHKERRQ(ierr);
193 
194     /* The work to be overlapped with the inner products follows.
195        This is application of the preconditioner and the operator
196        to compute intermediate quantites which will be combined (locally)
197        with the results of the inner products.
198        */
199     ierr = KSP_PCApply(ksp,ZVEC(loc_it),Q);CHKERRQ(ierr);
200     ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
201     ierr = KSP_MatMult(ksp,Amat,Q,W);CHKERRQ(ierr);
202 
203     /* Compute inner products of the new direction with previous directions,
204        and the norm of the to-be-orthogonalized direction "Z".
205        This information is enough to build the required entries
206        of H. The inner product with VEC_VV(it_loc) is
207        *different* than in the standard FGMRES and need to be dealt with specially.
208        That is, for standard FGMRES the orthogonalization coefficients are the same
209        as the coefficients used in the Arnoldi relation to reconstruct, but here this
210        is not true (albeit only for the one entry of H which we "unshift" below. */
211 
212     /* Finish the dot product, retrieving the extra entry */
213     ierr = VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr);
214     tt = PetscRealPart(lhh[loc_it+1]);
215 
216     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
217       Note that the Hessenberg entries require a shift, as these are for the
218       relation AU = VH, which is wrt unshifted basis vectors */
219     hh = HH(0,loc_it); hes=HES(0,loc_it);
220     for (j=0; j<loc_it; j++) {
221       hh[j]  = lhh[j];
222       hes[j] = lhh[j];
223     }
224     hh[loc_it]  = lhh[loc_it] + shift;
225     hes[loc_it] = lhh[loc_it] + shift;
226 
227     /* we delay applying the shift here */
228     for (j=0; j<=loc_it; j++) {
229       lhh[j]        = -lhh[j]; /* flip sign */
230     }
231 
232     /* Compute the norm of the un-normalized new direction using the rearranged formula
233        Note that these are shifted ("barred") quantities */
234     for (k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
235     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
236     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0 ;
237     if (tt < 0.0) {
238       /* If we detect square root breakdown in the norm, we must restart the algorithm.
239          Here this means we simply break the current loop and reconstruct the solution
240          using the basis we have computed thus far. Note that by breaking immediately,
241          we do not update the iteration count, so computation done in this iteration
242          should be disregarded.
243          */
244       ierr = PetscInfo2(ksp,"Restart due to square root breakdown at it = %D, tt=%g\n",ksp->its,(double)tt);CHKERRQ(ierr);
245       break;
246     } else {
247       tt = PetscSqrtReal(tt);
248     }
249 
250     /* new entry in hessenburg is the 2-norm of our new direction */
251     hh[loc_it+1]  = tt;
252     hes[loc_it+1] = tt;
253 
254     /* The recurred computation for the new direction
255        The division by tt is delayed to the happy breakdown check later
256        Note placement BEFORE the unshift
257        */
258     ierr = VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));CHKERRQ(ierr);
259     ierr = VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));CHKERRQ(ierr);
260     /* (VEC_VV(loc_it+1) is not normalized yet) */
261 
262     /* The recurred computation for the preconditioned vector (u) */
263     ierr = VecCopy(Q,PREVEC(loc_it+1));CHKERRQ(ierr);
264     ierr = VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));CHKERRQ(ierr);
265     ierr = VecScale(PREVEC(loc_it+1),1.0/tt);CHKERRQ(ierr);
266 
267     /* Unshift an entry in the GS coefficients ("removing the bar") */
268     lhh[loc_it]         -= shift;
269 
270     /* The recurred computation for z (Au)
271        Note placement AFTER the "unshift" */
272     ierr = VecCopy(W,ZVEC(loc_it+1));CHKERRQ(ierr);
273     ierr = VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));CHKERRQ(ierr);
274     ierr = VecScale(ZVEC(loc_it+1),1.0/tt);CHKERRQ(ierr);
275 
276     /* Happy Breakdown Check */
277     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
278     /* RS(loc_it) contains the res_norm from the last iteration  */
279     hapbnd = PetscMin(pipefgmres->haptol,hapbnd);
280     if (tt > hapbnd) {
281       /* scale new direction by its norm  */
282       ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr);
283     } else {
284       /* This happens when the solution is exactly reached. */
285       /* So there is no new direction... */
286       ierr   = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr);     /* set VEC_TEMP to 0 */
287       hapend = PETSC_TRUE;
288     }
289     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
290        current solution would not be exact if HES was singular.  Note that
291        HH non-singular implies that HES is not singular, and HES is guaranteed
292        to be nonsingular when PREVECS are linearly independent and A is
293        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
294        of HES). So we should really add a check to verify that HES is nonsingular.*/
295 
296     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
297        here to check that the hessenberg matrix is indeed non-singular (since
298        FGMRES does not guarantee this) */
299 
300     /* Now apply rotations to new col of hessenberg (and right side of system),
301        calculate new rotation, and get new residual norm at the same time*/
302     ierr = KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);CHKERRQ(ierr);
303     if (ksp->reason) break;
304 
305     loc_it++;
306     pipefgmres->it = (loc_it-1);   /* Add this here in case it has converged */
307 
308     ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
309     ksp->its++;
310     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
311     else ksp->rnorm = 0;
312     ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
313 
314     ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
315 
316     /* Catch error in happy breakdown and signal convergence and break from loop */
317     if (hapend) {
318       if (!ksp->reason) {
319         if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm);
320         else {
321           ksp->reason = KSP_DIVERGED_BREAKDOWN;
322           break;
323         }
324       }
325     }
326   }
327   /* END OF ITERATION LOOP */
328   ierr = KSPLogResidualHistory(ksp,ksp->rnorm);CHKERRQ(ierr);
329 
330   /*
331      Monitor if we know that we will not return for a restart */
332   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) {
333     ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm);CHKERRQ(ierr);
334   }
335 
336   if (itcount) *itcount = loc_it;
337 
338   /*
339     Down here we have to solve for the "best" coefficients of the Krylov
340     columns, add the solution values together, and possibly unwind the
341     preconditioning from the solution
342    */
343 
344   /* Form the solution (or the solution so far) */
345   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln
346      properly navigates */
347 
348   ierr = KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr);
349 
350   PetscFunctionReturn(0);
351 }
352 
353 /*
354     KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method.
355 
356    Input Parameter:
357 .     ksp - the Krylov space object that was set to use pipefgmres
358 
359    Output Parameter:
360 .     outits - number of iterations used
361 
362 */
363 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
364 {
365   PetscErrorCode ierr;
366   PetscInt       its,itcount;
367   KSP_PIPEFGMRES *pipefgmres    = (KSP_PIPEFGMRES*)ksp->data;
368   PetscBool      guess_zero = ksp->guess_zero;
369 
370   PetscFunctionBegin;
371   /* We have not checked these routines for use with complex numbers. The inner products
372      are likely not defined correctly for that case */
373   if (PetscDefined(USE_COMPLEX) && !PetscDefined(SKIP_COMPLEX)) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars");
374 
375   ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr);
376 
377   if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
378   ierr     = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
379   ksp->its = 0;
380   ierr     = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
381 
382   itcount     = 0;
383   ksp->reason = KSP_CONVERGED_ITERATING;
384   while (!ksp->reason) {
385     ierr     = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr);
386     ierr     = KSPPIPEFGMRESCycle(&its,ksp);CHKERRQ(ierr);
387     itcount += its;
388     if (itcount >= ksp->max_it) {
389       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
390       break;
391     }
392     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
393   }
394   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
395   PetscFunctionReturn(0);
396 }
397 
398 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
399 {
400   PetscErrorCode ierr;
401 
402   PetscFunctionBegin;
403   ierr = KSPReset_PIPEFGMRES(ksp);CHKERRQ(ierr);
404   ierr = KSPDestroy_GMRES(ksp);CHKERRQ(ierr);
405   PetscFunctionReturn(0);
406 }
407 
408 /*
409     KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the
410                       current iterates.
411 
412     Input parameters:
413         nrs - work area of size it + 1.
414         vguess  - index of initial guess
415         vdest - index of result.  Note that vguess may == vdest (replace
416                 guess with the solution).
417         it - HH upper triangular part is a block of size (it+1) x (it+1)
418 
419      This is an internal routine that knows about the PIPEFGMRES internals.
420  */
421 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it)
422 {
423   PetscScalar    tt;
424   PetscErrorCode ierr;
425   PetscInt       k,j;
426   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
427 
428   PetscFunctionBegin;
429   /* Solve for solution vector that minimizes the residual */
430 
431   if (it < 0) {                                 /* no pipefgmres steps have been performed */
432     ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */
433     PetscFunctionReturn(0);
434   }
435 
436   /* solve the upper triangular system - RS is the right side and HH is
437      the upper triangular matrix  - put soln in nrs */
438   if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it);
439   else nrs[it] = 0.0;
440 
441   for (k=it-1; k>=0; k--) {
442     tt = *RS(k);
443     for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j];
444     nrs[k] = tt / *HH(k,k);
445   }
446 
447   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
448   ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr);
449   ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr);
450 
451   /* add solution to previous solution */
452   if (vdest == vguess) {
453     ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr);
454   } else {
455     ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr);
456   }
457   PetscFunctionReturn(0);
458 }
459 
460 /*
461 
462     KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
463                             Return new residual.
464 
465     input parameters:
466 
467 .        ksp -    Krylov space object
468 .        it  -    plane rotations are applied to the (it+1)th column of the
469                   modified hessenberg (i.e. HH(:,it))
470 .        hapend - PETSC_FALSE not happy breakdown ending.
471 
472     output parameters:
473 .        res - the new residual
474 
475  */
476 /*
477 .  it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this
478  */
479 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res)
480 {
481   PetscScalar    *hh,*cc,*ss,*rs;
482   PetscInt       j;
483   PetscReal      hapbnd;
484   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
485   PetscErrorCode ierr;
486 
487   PetscFunctionBegin;
488   hh = HH(0,it);   /* pointer to beginning of column to update */
489   cc = CC(0);      /* beginning of cosine rotations */
490   ss = SS(0);      /* beginning of sine rotations */
491   rs = RS(0);      /* right hand side of least squares system */
492 
493   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
494   for (j=0; j<=it+1; j++) *HES(j,it) = hh[j];
495 
496   /* check for the happy breakdown */
497   hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol);
498   if (PetscAbsScalar(hh[it+1]) < hapbnd) {
499     ierr    = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr);
500     *hapend = PETSC_TRUE;
501   }
502 
503   /* Apply all the previously computed plane rotations to the new column
504      of the Hessenberg matrix */
505   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
506      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
507 
508   for (j=0; j<it; j++) {
509     PetscScalar hhj = hh[j];
510     hh[j]   = PetscConj(cc[j])*hhj + ss[j]*hh[j+1];
511     hh[j+1] =          -ss[j] *hhj + cc[j]*hh[j+1];
512   }
513 
514   /*
515     compute the new plane rotation, and apply it to:
516      1) the right-hand-side of the Hessenberg system (RS)
517         note: it affects RS(it) and RS(it+1)
518      2) the new column of the Hessenberg matrix
519         note: it affects HH(it,it) which is currently pointed to
520         by hh and HH(it+1, it) (*(hh+1))
521     thus obtaining the updated value of the residual...
522   */
523 
524   /* compute new plane rotation */
525 
526   if (!*hapend) {
527     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1])));
528     if (delta == 0.0) {
529       ksp->reason = KSP_DIVERGED_NULL;
530       PetscFunctionReturn(0);
531     }
532 
533     cc[it] = hh[it] / delta;    /* new cosine value */
534     ss[it] = hh[it+1] / delta;  /* new sine value */
535 
536     hh[it]   = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1];
537     rs[it+1] = -ss[it]*rs[it];
538     rs[it]   = PetscConj(cc[it])*rs[it];
539     *res     = PetscAbsScalar(rs[it+1]);
540   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
541             another rotation matrix (so RH doesn't change).  The new residual is
542             always the new sine term times the residual from last time (RS(it)),
543             but now the new sine rotation would be zero...so the residual should
544             be zero...so we will multiply "zero" by the last residual.  This might
545             not be exactly what we want to do here -could just return "zero". */
546 
547     *res = 0.0;
548   }
549   PetscFunctionReturn(0);
550 }
551 
552 /*
553    KSPBuildSolution_PIPEFGMRES
554 
555      Input Parameter:
556 .     ksp - the Krylov space object
557 .     ptr-
558 
559    Output Parameter:
560 .     result - the solution
561 
562    Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle
563    calls directly.
564 
565 */
566 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result)
567 {
568   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
569   PetscErrorCode ierr;
570 
571   PetscFunctionBegin;
572   if (!ptr) {
573     if (!pipefgmres->sol_temp) {
574       ierr = VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp);CHKERRQ(ierr);
575       ierr = PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp);CHKERRQ(ierr);
576     }
577     ptr = pipefgmres->sol_temp;
578   }
579   if (!pipefgmres->nrs) {
580     /* allocate the work area */
581     ierr = PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs);CHKERRQ(ierr);
582     ierr = PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar));CHKERRQ(ierr);
583   }
584 
585   ierr = KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it);CHKERRQ(ierr);
586   if (result) *result = ptr;
587   PetscFunctionReturn(0);
588 }
589 
590 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
591 {
592   PetscErrorCode ierr;
593   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
594   PetscBool      flg;
595   PetscScalar    shift;
596 
597   PetscFunctionBegin;
598   ierr = KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);CHKERRQ(ierr);
599   ierr = PetscOptionsHead(PetscOptionsObject,"KSP pipelined FGMRES Options");CHKERRQ(ierr);
600   ierr = PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg);CHKERRQ(ierr);
601   if (flg) { ierr = KSPPIPEFGMRESSetShift(ksp,shift);CHKERRQ(ierr); }
602   ierr = PetscOptionsTail();CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 
606 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer)
607 {
608   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
609   PetscErrorCode ierr;
610   PetscBool      iascii,isstring;
611 
612   PetscFunctionBegin;
613   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
614   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
615 
616   if (iascii) {
617     ierr = PetscViewerASCIIPrintf(viewer,"  restart=%D\n",pipefgmres->max_k);CHKERRQ(ierr);
618     ierr = PetscViewerASCIIPrintf(viewer,"  happy breakdown tolerance %g\n",(double)pipefgmres->haptol);CHKERRQ(ierr);
619 #if defined(PETSC_USE_COMPLEX)
620     ierr = PetscViewerASCIIPrintf(viewer,"  shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr);
621 #else
622     ierr = PetscViewerASCIIPrintf(viewer,"  shift=%g\n",pipefgmres->shift);CHKERRQ(ierr);
623 #endif
624   } else if (isstring) {
625     ierr = PetscViewerStringSPrintf(viewer,"restart %D",pipefgmres->max_k);CHKERRQ(ierr);
626 #if defined(PETSC_USE_COMPLEX)
627     ierr = PetscViewerStringSPrintf(viewer,"   shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr);
628 #else
629     ierr = PetscViewerStringSPrintf(viewer,"   shift=%g\n",pipefgmres->shift);CHKERRQ(ierr);
630 #endif
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
636 {
637   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
638   PetscErrorCode   ierr;
639   PetscInt         i;
640 
641   PetscFunctionBegin;
642   ierr = PetscFree(pipefgmres->prevecs);CHKERRQ(ierr);
643   ierr = PetscFree(pipefgmres->zvecs);CHKERRQ(ierr);
644   for (i=0; i<pipefgmres->nwork_alloc; i++) {
645     ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]);CHKERRQ(ierr);
646     ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]);CHKERRQ(ierr);
647   }
648   ierr = PetscFree(pipefgmres->prevecs_user_work);CHKERRQ(ierr);
649   ierr = PetscFree(pipefgmres->zvecs_user_work);CHKERRQ(ierr);
650   ierr = PetscFree(pipefgmres->redux);CHKERRQ(ierr);
651   ierr = KSPReset_GMRES(ksp);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 /*MC
656    KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method.
657 
658    A flexible, 1-stage pipelined variant of GMRES.
659 
660    Options Database Keys:
661 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
662 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
663 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
664 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
665                              vectors are allocated as needed)
666 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
667 
668    Level: intermediate
669 
670    Notes:
671 
672    This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter.
673 
674    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
675 
676    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES).
677    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
678    See the FAQ on the PETSc website for details.
679 
680    Developer Notes:
681     This class is subclassed off of KSPGMRES.
682 
683    Reference:
684     P. Sanan, S.M. Schnepp, and D.A. May,
685     "Pipelined, Flexible Krylov Subspace Methods,"
686     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
687     DOI: 10.1137/15M1049130
688 
689 .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES
690            KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEFGMRESSetShift()
691 M*/
692 
693 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
694 {
695   KSP_PIPEFGMRES *pipefgmres;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = PetscNewLog(ksp,&pipefgmres);CHKERRQ(ierr);
700 
701   ksp->data                              = (void*)pipefgmres;
702   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
703   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
704   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
705   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
706   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
707   ksp->ops->view                         = KSPView_PIPEFGMRES;
708   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
709   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
710   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
711 
712   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3);CHKERRQ(ierr);
713   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);CHKERRQ(ierr);
714 
715   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);CHKERRQ(ierr);
716   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);CHKERRQ(ierr);
717   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES);CHKERRQ(ierr);
718 
719   pipefgmres->nextra_vecs    = 1;
720   pipefgmres->haptol         = 1.0e-30;
721   pipefgmres->q_preallocate  = 0;
722   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
723   pipefgmres->orthog         = NULL;
724   pipefgmres->nrs            = NULL;
725   pipefgmres->sol_temp       = NULL;
726   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
727   pipefgmres->Rsvd           = NULL;
728   pipefgmres->orthogwork     = NULL;
729   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
730   pipefgmres->shift          = 1.0;
731   PetscFunctionReturn(0);
732 }
733 
734 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it)
735 {
736   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
737   PetscInt       nwork   = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
738   PetscInt       nalloc;                            /* number to allocate */
739   PetscErrorCode ierr;
740   PetscInt       k;
741 
742   PetscFunctionBegin;
743   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
744                                       in a single chunk */
745 
746   /* Adjust the number to allocate to make sure that we don't exceed the
747      number of available slots (pipefgmres->vecs_allocated)*/
748   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) {
749     nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
750   }
751   if (!nalloc) PetscFunctionReturn(0);
752 
753   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
754 
755   /* work vectors */
756   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL);CHKERRQ(ierr);
757   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]);CHKERRQ(ierr);
758   for (k=0; k < nalloc; k++) {
759     pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k];
760   }
761   /* specify size of chunk allocated */
762   pipefgmres->mwork_alloc[nwork] = nalloc;
763 
764   /* preconditioned vectors (note we don't use VEC_OFFSET) */
765   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL);CHKERRQ(ierr);
766   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]);CHKERRQ(ierr);
767   for (k=0; k < nalloc; k++) {
768     pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k];
769   }
770 
771   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL);CHKERRQ(ierr);
772   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]);CHKERRQ(ierr);
773   for (k=0; k < nalloc; k++) {
774     pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k];
775   }
776 
777   /* increment the number of work vector chunks */
778   pipefgmres->nwork_alloc++;
779   PetscFunctionReturn(0);
780 }
781 
782 /*@
783   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver.
784 
785   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there).
786 
787 Logically Collective on ksp
788 
789 Input Parameters:
790 +  ksp - the Krylov space context
791 -  shift - the shift
792 
793 Level: intermediate
794 
795 Options Database:
796 . -ksp_pipefgmres_shift <shift>
797 
798 .seealso: KSPComputeEigenvalues()
799 @*/
800 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift)
801 {
802   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
803 
804   PetscFunctionBegin;
805   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
806   PetscValidLogicalCollectiveScalar(ksp,shift,2);
807   pipefgmres->shift = shift;
808   PetscFunctionReturn(0);
809 }
810