xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision 445ca0904fedcd9c19e8b5d649a2e70cfb1cd372)
1 /*
2   Contributed by Patrick Sanan and Sascha M. Schnepp
3 */
4 
5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h>       /*I  "petscksp.h"  I*/
6 
7 static PetscBool  cited = PETSC_FALSE;
8 static const char citation[] =
9   "@article{SSM2016,\n"
10   "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
11   "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
12   "  journal = {SIAM Journal on Scientific Computing},\n"
13   "  volume = {38},\n"
14   "  number = {5},\n"
15   "  pages = {C441-C470},\n"
16   "  year = {2016},\n"
17   "  doi = {10.1137/15M1049130},\n"
18   "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
19   "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
20   "}\n";
21 
22 #define PIPEFGMRES_DELTA_DIRECTIONS 10
23 #define PIPEFGMRES_DEFAULT_MAXK     30
24 
25 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt);
26 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*);
27 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
28 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
29 
30 /*
31 
32     KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres.
33 
34     This is called once, usually automatically by KSPSolve() or KSPSetUp(),
35     but can be called directly by KSPSetUp().
36 
37 */
38 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
39 {
40   PetscErrorCode ierr;
41   PetscInt       k;
42   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
43   const PetscInt max_k = pipefgmres->max_k;
44 
45   PetscFunctionBegin;
46   ierr = KSPSetUp_GMRES(ksp);CHKERRQ(ierr);
47 
48   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs);CHKERRQ(ierr);
49   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work);CHKERRQ(ierr);
50   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr);
51 
52   ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL);CHKERRQ(ierr);
53   ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]);CHKERRQ(ierr);
54   for (k=0; k < pipefgmres->vv_allocated; k++) {
55     pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
56   }
57 
58   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs);CHKERRQ(ierr);
59   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work);CHKERRQ(ierr);
60   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr);
61 
62   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux);CHKERRQ(ierr);
63   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)));CHKERRQ(ierr);
64 
65   ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL);CHKERRQ(ierr);
66   ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]);CHKERRQ(ierr);
67   for (k=0; k < pipefgmres->vv_allocated; k++) {
68     pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
69   }
70 
71   PetscFunctionReturn(0);
72 }
73 
74 /*
75 
76     KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart.  Return residual
77                   history if requested.
78 
79     input parameters:
80 .        pipefgmres  - structure containing parameters and work areas
81 
82     output parameters:
83 .        itcount - number of iterations used.  If null, ignored.
84 .        converged - 0 if not converged
85 
86     Notes:
87     On entry, the value in vector VEC_VV(0) should be
88     the initial residual.
89 
90 
91  */
92 
93 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp)
94 {
95   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
96   PetscReal      res_norm;
97   PetscReal      hapbnd,tt;
98   PetscScalar    *hh,*hes,*lhh,shift = pipefgmres->shift;
99   PetscBool      hapend = PETSC_FALSE;  /* indicates happy breakdown ending */
100   PetscErrorCode ierr;
101   PetscInt       loc_it;                /* local count of # of dir. in Krylov space */
102   PetscInt       max_k = pipefgmres->max_k; /* max # of directions Krylov space */
103   PetscInt       i,j,k;
104   Mat            Amat,Pmat;
105   Vec            Q,W; /* Pipelining vectors */
106   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
107 
108   PetscFunctionBegin;
109   if (itcount) *itcount = 0;
110 
111   /* Assign simpler names to these vectors, allocated as pipelining workspace */
112   Q = VEC_Q;
113   W = VEC_W;
114 
115   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
116   /* Note that we add an extra value here to allow for a single reduction */
117   if (!pipefgmres->orthogwork) { ierr = PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);CHKERRQ(ierr);
118   }
119   lhh = pipefgmres->orthogwork;
120 
121   /* Number of pseudo iterations since last restart is the number
122      of prestart directions */
123   loc_it = 0;
124 
125   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
126      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
127      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
128      (loc_it -1) is passed, so the two are equivalent */
129   pipefgmres->it = (loc_it - 1);
130 
131   /* initial residual is in VEC_VV(0)  - compute its norm*/
132   ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr);
133 
134   /* first entry in right-hand-side of hessenberg system is just
135      the initial residual norm */
136   *RS(0) = res_norm;
137 
138   ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
139   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
140   else ksp->rnorm = 0;
141   ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
142   ierr = KSPLogResidualHistory(ksp,ksp->rnorm);CHKERRQ(ierr);
143   ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm);CHKERRQ(ierr);
144 
145   /* check for the convergence - maybe the current guess is good enough */
146   ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
147   if (ksp->reason) {
148     if (itcount) *itcount = 0;
149     PetscFunctionReturn(0);
150   }
151 
152   /* scale VEC_VV (the initial residual) */
153   ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr);
154 
155   /* Fill the pipeline */
156   ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr);
157   ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
158   ierr = KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));CHKERRQ(ierr);
159   ierr = VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it));CHKERRQ(ierr); /* Note shift */
160 
161   /* MAIN ITERATION LOOP BEGINNING*/
162   /* keep iterating until we have converged OR generated the max number
163      of directions OR reached the max number of iterations for the method */
164   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
165     if (loc_it) {
166       ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
167       ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
168     }
169     pipefgmres->it = (loc_it - 1);
170 
171     /* see if more space is needed for work vectors */
172     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
173       ierr = KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr);
174       /* (loc_it+1) is passed in as number of the first vector that should
175          be allocated */
176     }
177 
178     /* Note that these inner products are with "Z" now, so
179        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
180        not the value from the equivalent FGMRES run (even in exact arithmetic)
181        That is, the H we need for the Arnoldi relation is different from the
182        coefficients we use in the orthogonalization process,because of the shift */
183 
184     /* Do some local twiddling to allow for a single reduction */
185     for(i=0;i<loc_it+1;i++){
186       redux[i] = VEC_VV(i);
187     }
188     redux[loc_it+1] = ZVEC(loc_it);
189 
190     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
191     ierr = VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr);
192 
193     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
194     ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));CHKERRQ(ierr);
195 
196     /* The work to be overlapped with the inner products follows.
197        This is application of the preconditioner and the operator
198        to compute intermediate quantites which will be combined (locally)
199        with the results of the inner products.
200        */
201     ierr = KSP_PCApply(ksp,ZVEC(loc_it),Q);CHKERRQ(ierr);
202     ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
203     ierr = KSP_MatMult(ksp,Amat,Q,W);CHKERRQ(ierr);
204 
205     /* Compute inner products of the new direction with previous directions,
206        and the norm of the to-be-orthogonalized direction "Z".
207        This information is enough to build the required entries
208        of H. The inner product with VEC_VV(it_loc) is
209        *different* than in the standard FGMRES and need to be dealt with specially.
210        That is, for standard FGMRES the orthogonalization coefficients are the same
211        as the coefficients used in the Arnoldi relation to reconstruct, but here this
212        is not true (albeit only for the one entry of H which we "unshift" below. */
213 
214     /* Finish the dot product, retrieving the extra entry */
215     ierr = VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr);
216     tt = PetscRealPart(lhh[loc_it+1]);
217 
218     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
219       Note that the Hessenberg entries require a shift, as these are for the
220       relation AU = VH, which is wrt unshifted basis vectors */
221     hh = HH(0,loc_it); hes=HES(0,loc_it);
222     for (j=0; j<loc_it; j++) {
223       hh[j]  = lhh[j];
224       hes[j] = lhh[j];
225     }
226     hh[loc_it]  = lhh[loc_it] + shift;
227     hes[loc_it] = lhh[loc_it] + shift;
228 
229     /* we delay applying the shift here */
230     for (j=0; j<=loc_it; j++) {
231       lhh[j]        = -lhh[j]; /* flip sign */
232     }
233 
234     /* Compute the norm of the un-normalized new direction using the rearranged formula
235        Note that these are shifted ("barred") quantities */
236     for(k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
237     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
238     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0 ;
239     if (tt < 0.0) {
240       /* If we detect square root breakdown in the norm, we must restart the algorithm.
241          Here this means we simply break the current loop and reconstruct the solution
242          using the basis we have computed thus far. Note that by breaking immediately,
243          we do not update the iteration count, so computation done in this iteration
244          should be disregarded.
245          */
246       ierr = PetscInfo2(ksp,"Restart due to square root breakdown at it = %D, tt=%g\n",ksp->its,(double)tt);CHKERRQ(ierr);
247       break;
248     } else {
249       tt = PetscSqrtReal(tt);
250     }
251 
252     /* new entry in hessenburg is the 2-norm of our new direction */
253     hh[loc_it+1]  = tt;
254     hes[loc_it+1] = tt;
255 
256     /* The recurred computation for the new direction
257        The division by tt is delayed to the happy breakdown check later
258        Note placement BEFORE the unshift
259        */
260     ierr = VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));CHKERRQ(ierr);
261     ierr = VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));CHKERRQ(ierr);
262     /* (VEC_VV(loc_it+1) is not normalized yet) */
263 
264     /* The recurred computation for the preconditioned vector (u) */
265     ierr = VecCopy(Q,PREVEC(loc_it+1));CHKERRQ(ierr);
266     ierr = VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));CHKERRQ(ierr);
267     ierr = VecScale(PREVEC(loc_it+1),1.0/tt);CHKERRQ(ierr);
268 
269     /* Unshift an entry in the GS coefficients ("removing the bar") */
270     lhh[loc_it]         -= shift;
271 
272     /* The recurred computation for z (Au)
273        Note placement AFTER the "unshift" */
274     ierr = VecCopy(W,ZVEC(loc_it+1));CHKERRQ(ierr);
275     ierr = VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));CHKERRQ(ierr);
276     ierr = VecScale(ZVEC(loc_it+1),1.0/tt);CHKERRQ(ierr);
277 
278     /* Happy Breakdown Check */
279     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
280     /* RS(loc_it) contains the res_norm from the last iteration  */
281     hapbnd = PetscMin(pipefgmres->haptol,hapbnd);
282     if (tt > hapbnd) {
283       /* scale new direction by its norm  */
284       ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr);
285     } else {
286       /* This happens when the solution is exactly reached. */
287       /* So there is no new direction... */
288       ierr   = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr);     /* set VEC_TEMP to 0 */
289       hapend = PETSC_TRUE;
290     }
291     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
292        current solution would not be exact if HES was singular.  Note that
293        HH non-singular implies that HES is not singular, and HES is guaranteed
294        to be nonsingular when PREVECS are linearly independent and A is
295        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
296        of HES). So we should really add a check to verify that HES is nonsingular.*/
297 
298     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
299        here to check that the hessenberg matrix is indeed non-singular (since
300        FGMRES does not guarantee this) */
301 
302     /* Now apply rotations to new col of hessenberg (and right side of system),
303        calculate new rotation, and get new residual norm at the same time*/
304     ierr = KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);CHKERRQ(ierr);
305     if (ksp->reason) break;
306 
307     loc_it++;
308     pipefgmres->it = (loc_it-1);   /* Add this here in case it has converged */
309 
310     ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
311     ksp->its++;
312     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
313     else ksp->rnorm = 0;
314     ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
315 
316     ierr = (*ksp->converged)(ksp,ksp->its,ksp->rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
317 
318     /* Catch error in happy breakdown and signal convergence and break from loop */
319     if (hapend) {
320       if (!ksp->reason) {
321         if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm);
322         else {
323           ksp->reason = KSP_DIVERGED_BREAKDOWN;
324           break;
325         }
326       }
327     }
328   }
329   /* END OF ITERATION LOOP */
330   ierr = KSPLogResidualHistory(ksp,ksp->rnorm);CHKERRQ(ierr);
331 
332   /*
333      Monitor if we know that we will not return for a restart */
334   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) {
335     ierr = KSPMonitor(ksp,ksp->its,ksp->rnorm);CHKERRQ(ierr);
336   }
337 
338   if (itcount) *itcount = loc_it;
339 
340   /*
341     Down here we have to solve for the "best" coefficients of the Krylov
342     columns, add the solution values together, and possibly unwind the
343     preconditioning from the solution
344    */
345 
346   /* Form the solution (or the solution so far) */
347   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln
348      properly navigates */
349 
350   ierr = KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr);
351 
352   PetscFunctionReturn(0);
353 }
354 
355 /*
356     KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method.
357 
358 
359    Input Parameter:
360 .     ksp - the Krylov space object that was set to use pipefgmres
361 
362    Output Parameter:
363 .     outits - number of iterations used
364 
365 */
366 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
367 {
368   PetscErrorCode ierr;
369   PetscInt       its,itcount;
370   KSP_PIPEFGMRES *pipefgmres    = (KSP_PIPEFGMRES*)ksp->data;
371   PetscBool      guess_zero = ksp->guess_zero;
372 
373   PetscFunctionBegin;
374 
375   /* We have not checked these routines for use with complex numbers. The inner products
376      are likely not defined correctly for that case */
377 #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX))
378   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars");
379 #endif
380 
381   ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr);
382 
383   if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
384   ierr     = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
385   ksp->its = 0;
386   ierr     = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
387 
388   itcount     = 0;
389   ksp->reason = KSP_CONVERGED_ITERATING;
390   while (!ksp->reason) {
391     ierr     = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr);
392     ierr     = KSPPIPEFGMRESCycle(&its,ksp);CHKERRQ(ierr);
393     itcount += its;
394     if (itcount >= ksp->max_it) {
395       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
396       break;
397     }
398     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
399   }
400   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
401   PetscFunctionReturn(0);
402 }
403 
404 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
405 {
406   PetscErrorCode ierr;
407 
408   PetscFunctionBegin;
409   ierr = KSPReset_PIPEFGMRES(ksp);CHKERRQ(ierr);
410   ierr = KSPDestroy_GMRES(ksp);CHKERRQ(ierr);
411   PetscFunctionReturn(0);
412 }
413 
414 /*
415     KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the
416                       current iterates.
417 
418     Input parameters:
419         nrs - work area of size it + 1.
420         vguess  - index of initial guess
421         vdest - index of result.  Note that vguess may == vdest (replace
422                 guess with the solution).
423         it - HH upper triangular part is a block of size (it+1) x (it+1)
424 
425      This is an internal routine that knows about the PIPEFGMRES internals.
426  */
427 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it)
428 {
429   PetscScalar    tt;
430   PetscErrorCode ierr;
431   PetscInt       k,j;
432   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
433 
434   PetscFunctionBegin;
435   /* Solve for solution vector that minimizes the residual */
436 
437   if (it < 0) {                                 /* no pipefgmres steps have been performed */
438     ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */
439     PetscFunctionReturn(0);
440   }
441 
442   /* solve the upper triangular system - RS is the right side and HH is
443      the upper triangular matrix  - put soln in nrs */
444   if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it);
445   else nrs[it] = 0.0;
446 
447   for (k=it-1; k>=0; k--) {
448     tt = *RS(k);
449     for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j];
450     nrs[k] = tt / *HH(k,k);
451   }
452 
453   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
454   ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr);
455   ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr);
456 
457   /* add solution to previous solution */
458   if (vdest == vguess) {
459     ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr);
460   } else {
461     ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr);
462   }
463   PetscFunctionReturn(0);
464 }
465 
466 /*
467 
468     KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
469                             Return new residual.
470 
471     input parameters:
472 
473 .        ksp -    Krylov space object
474 .        it  -    plane rotations are applied to the (it+1)th column of the
475                   modified hessenberg (i.e. HH(:,it))
476 .        hapend - PETSC_FALSE not happy breakdown ending.
477 
478     output parameters:
479 .        res - the new residual
480 
481  */
482 /*
483 .  it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this
484  */
485 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res)
486 {
487   PetscScalar    *hh,*cc,*ss,*rs;
488   PetscInt       j;
489   PetscReal      hapbnd;
490   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
491   PetscErrorCode ierr;
492 
493   PetscFunctionBegin;
494   hh = HH(0,it);   /* pointer to beginning of column to update */
495   cc = CC(0);      /* beginning of cosine rotations */
496   ss = SS(0);      /* beginning of sine rotations */
497   rs = RS(0);      /* right hand side of least squares system */
498 
499   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
500   for (j=0; j<=it+1; j++) *HES(j,it) = hh[j];
501 
502   /* check for the happy breakdown */
503   hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol);
504   if (PetscAbsScalar(hh[it+1]) < hapbnd) {
505     ierr    = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr);
506     *hapend = PETSC_TRUE;
507   }
508 
509   /* Apply all the previously computed plane rotations to the new column
510      of the Hessenberg matrix */
511   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
512      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
513 
514   for (j=0; j<it; j++) {
515     PetscScalar hhj = hh[j];
516     hh[j]   = PetscConj(cc[j])*hhj + ss[j]*hh[j+1];
517     hh[j+1] =          -ss[j] *hhj + cc[j]*hh[j+1];
518   }
519 
520   /*
521     compute the new plane rotation, and apply it to:
522      1) the right-hand-side of the Hessenberg system (RS)
523         note: it affects RS(it) and RS(it+1)
524      2) the new column of the Hessenberg matrix
525         note: it affects HH(it,it) which is currently pointed to
526         by hh and HH(it+1, it) (*(hh+1))
527     thus obtaining the updated value of the residual...
528   */
529 
530   /* compute new plane rotation */
531 
532   if (!*hapend) {
533     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1])));
534     if (delta == 0.0) {
535       ksp->reason = KSP_DIVERGED_NULL;
536       PetscFunctionReturn(0);
537     }
538 
539     cc[it] = hh[it] / delta;    /* new cosine value */
540     ss[it] = hh[it+1] / delta;  /* new sine value */
541 
542     hh[it]   = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1];
543     rs[it+1] = -ss[it]*rs[it];
544     rs[it]   = PetscConj(cc[it])*rs[it];
545     *res     = PetscAbsScalar(rs[it+1]);
546   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
547             another rotation matrix (so RH doesn't change).  The new residual is
548             always the new sine term times the residual from last time (RS(it)),
549             but now the new sine rotation would be zero...so the residual should
550             be zero...so we will multiply "zero" by the last residual.  This might
551             not be exactly what we want to do here -could just return "zero". */
552 
553     *res = 0.0;
554   }
555   PetscFunctionReturn(0);
556 }
557 
558 /*
559    KSPBuildSolution_PIPEFGMRES
560 
561      Input Parameter:
562 .     ksp - the Krylov space object
563 .     ptr-
564 
565    Output Parameter:
566 .     result - the solution
567 
568    Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle
569    calls directly.
570 
571 */
572 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result)
573 {
574   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
575   PetscErrorCode ierr;
576 
577   PetscFunctionBegin;
578   if (!ptr) {
579     if (!pipefgmres->sol_temp) {
580       ierr = VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp);CHKERRQ(ierr);
581       ierr = PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp);CHKERRQ(ierr);
582     }
583     ptr = pipefgmres->sol_temp;
584   }
585   if (!pipefgmres->nrs) {
586     /* allocate the work area */
587     ierr = PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs);CHKERRQ(ierr);
588     ierr = PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar));CHKERRQ(ierr);
589   }
590 
591   ierr = KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it);CHKERRQ(ierr);
592   if (result) *result = ptr;
593   PetscFunctionReturn(0);
594 }
595 
596 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
597 {
598   PetscErrorCode ierr;
599   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
600   PetscBool      flg;
601   PetscScalar    shift;
602 
603   PetscFunctionBegin;
604   ierr = KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);CHKERRQ(ierr);
605   ierr = PetscOptionsHead(PetscOptionsObject,"KSP pipelined FGMRES Options");CHKERRQ(ierr);
606   ierr = PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg);CHKERRQ(ierr);
607   if (flg) { ierr = KSPPIPEFGMRESSetShift(ksp,shift);CHKERRQ(ierr); }
608   ierr = PetscOptionsTail();CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer)
613 {
614   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
615   PetscErrorCode ierr;
616   PetscBool      iascii,isstring;
617 
618   PetscFunctionBegin;
619   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
620   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
621 
622   if (iascii) {
623     ierr = PetscViewerASCIIPrintf(viewer,"  restart=%D\n",pipefgmres->max_k);CHKERRQ(ierr);
624     ierr = PetscViewerASCIIPrintf(viewer,"  happy breakdown tolerance %g\n",(double)pipefgmres->haptol);CHKERRQ(ierr);
625 #if defined(PETSC_USE_COMPLEX)
626     ierr = PetscViewerASCIIPrintf(viewer,"  shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr);
627 #else
628     ierr = PetscViewerASCIIPrintf(viewer,"  shift=%g\n",pipefgmres->shift);CHKERRQ(ierr);
629 #endif
630   } else if (isstring) {
631     ierr = PetscViewerStringSPrintf(viewer,"restart %D",pipefgmres->max_k);CHKERRQ(ierr);
632 #if defined(PETSC_USE_COMPLEX)
633     ierr = PetscViewerStringSPrintf(viewer,"   shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr);
634 #else
635     ierr = PetscViewerStringSPrintf(viewer,"   shift=%g\n",pipefgmres->shift);CHKERRQ(ierr);
636 #endif
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
642 {
643   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
644   PetscErrorCode   ierr;
645   PetscInt         i;
646 
647   PetscFunctionBegin;
648   ierr = PetscFree(pipefgmres->prevecs);CHKERRQ(ierr);
649   ierr = PetscFree(pipefgmres->zvecs);CHKERRQ(ierr);
650   for (i=0; i<pipefgmres->nwork_alloc; i++) {
651     ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]);CHKERRQ(ierr);
652     ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]);CHKERRQ(ierr);
653   }
654   ierr = PetscFree(pipefgmres->prevecs_user_work);CHKERRQ(ierr);
655   ierr = PetscFree(pipefgmres->zvecs_user_work);CHKERRQ(ierr);
656   ierr = PetscFree(pipefgmres->redux);CHKERRQ(ierr);
657   ierr = KSPReset_GMRES(ksp);CHKERRQ(ierr);
658   PetscFunctionReturn(0);
659 }
660 
661 /*MC
662    KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method.
663 
664    A flexible, 1-stage pipelined variant of GMRES.
665 
666    Options Database Keys:
667 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
668 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
669 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
670 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
671                              vectors are allocated as needed)
672 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
673 
674 
675    Level: intermediate
676 
677    Notes:
678 
679    This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter.
680 
681    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
682 
683    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES).
684    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
685    See the FAQ on the PETSc website for details.
686 
687    Developer Notes:
688     This class is subclassed off of KSPGMRES.
689 
690    Reference:
691     P. Sanan, S.M. Schnepp, and D.A. May,
692     "Pipelined, Flexible Krylov Subspace Methods,"
693     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
694     DOI: 10.1137/15M1049130
695 
696 .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES
697            KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEFGMRESSetShift()
698 M*/
699 
700 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
701 {
702   KSP_PIPEFGMRES *pipefgmres;
703   PetscErrorCode ierr;
704 
705   PetscFunctionBegin;
706   ierr = PetscNewLog(ksp,&pipefgmres);CHKERRQ(ierr);
707 
708   ksp->data                              = (void*)pipefgmres;
709   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
710   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
711   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
712   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
713   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
714   ksp->ops->view                         = KSPView_PIPEFGMRES;
715   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
716   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
717   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
718 
719   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3);CHKERRQ(ierr);
720   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);CHKERRQ(ierr);
721 
722   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);CHKERRQ(ierr);
723   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);CHKERRQ(ierr);
724   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES);CHKERRQ(ierr);
725 
726   pipefgmres->nextra_vecs    = 1;
727   pipefgmres->haptol         = 1.0e-30;
728   pipefgmres->q_preallocate  = 0;
729   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
730   pipefgmres->orthog         = NULL;
731   pipefgmres->nrs            = NULL;
732   pipefgmres->sol_temp       = NULL;
733   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
734   pipefgmres->Rsvd           = NULL;
735   pipefgmres->orthogwork     = NULL;
736   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
737   pipefgmres->shift          = 1.0;
738   PetscFunctionReturn(0);
739 }
740 
741 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it)
742 {
743   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
744   PetscInt       nwork   = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
745   PetscInt       nalloc;                            /* number to allocate */
746   PetscErrorCode ierr;
747   PetscInt       k;
748 
749   PetscFunctionBegin;
750   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
751                                       in a single chunk */
752 
753   /* Adjust the number to allocate to make sure that we don't exceed the
754      number of available slots (pipefgmres->vecs_allocated)*/
755   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) {
756     nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
757   }
758   if (!nalloc) PetscFunctionReturn(0);
759 
760   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
761 
762   /* work vectors */
763   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL);CHKERRQ(ierr);
764   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]);CHKERRQ(ierr);
765   for (k=0; k < nalloc; k++) {
766     pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k];
767   }
768   /* specify size of chunk allocated */
769   pipefgmres->mwork_alloc[nwork] = nalloc;
770 
771   /* preconditioned vectors (note we don't use VEC_OFFSET) */
772   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL);CHKERRQ(ierr);
773   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]);CHKERRQ(ierr);
774   for (k=0; k < nalloc; k++) {
775     pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k];
776   }
777 
778   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL);CHKERRQ(ierr);
779   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]);CHKERRQ(ierr);
780   for (k=0; k < nalloc; k++) {
781     pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k];
782   }
783 
784   /* increment the number of work vector chunks */
785   pipefgmres->nwork_alloc++;
786   PetscFunctionReturn(0);
787 }
788 /*@
789   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver.
790 
791   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there).
792 
793 Logically Collective on ksp
794 
795 Input Parameters:
796 +  ksp - the Krylov space context
797 -  shift - the shift
798 
799 Level: intermediate
800 
801 Options Database:
802 . -ksp_pipefgmres_shift <shift>
803 
804 .seealso: KSPComputeEigenvalues()
805 @*/
806 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift)
807 {
808   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
809 
810   PetscFunctionBegin;
811   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
812   PetscValidLogicalCollectiveScalar(ksp,shift,2);
813   pipefgmres->shift = shift;
814   PetscFunctionReturn(0);
815 }
816