xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision 8292e18f74b084e7dd6e8162076fc4cae0c1ceeb)
1 /*
2   Contributed by Patrick Sanan and Sascha M. Schnepp
3 */
4 
5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h>       /*I  "petscksp.h"  I*/
6 
7 static PetscBool  cited = PETSC_FALSE;
8 static const char citation[] =
9   "@article{SSM2016,\n"
10   "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
11   "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
12   "  journal = {SIAM Journal on Scientific Computing},\n"
13   "  volume = {38},\n"
14   "  number = {5},\n"
15   "  pages = {C441-C470},\n"
16   "  year = {2016},\n"
17   "  doi = {10.1137/15M1049130},\n"
18   "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
19   "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
20   "}\n";
21 
22 #define PIPEFGMRES_DELTA_DIRECTIONS 10
23 #define PIPEFGMRES_DEFAULT_MAXK     30
24 
25 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt);
26 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*);
27 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
28 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
29 
30 /*
31 
32     KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres.
33 
34     This is called once, usually automatically by KSPSolve() or KSPSetUp(),
35     but can be called directly by KSPSetUp().
36 
37 */
38 #undef __FUNCT__
39 #define __FUNCT__ "KSPSetUp_PIPEFGMRES"
40 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
41 {
42   PetscErrorCode ierr;
43   PetscInt       k;
44   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
45   const PetscInt max_k = pipefgmres->max_k;
46 
47   PetscFunctionBegin;
48   ierr = KSPSetUp_GMRES(ksp);CHKERRQ(ierr);
49 
50   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs);CHKERRQ(ierr);
51   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work);CHKERRQ(ierr);
52   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr);
53 
54   ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL);CHKERRQ(ierr);
55   ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]);CHKERRQ(ierr);
56   for (k=0; k < pipefgmres->vv_allocated; k++) {
57     pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
58   }
59 
60   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs);CHKERRQ(ierr);
61   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work);CHKERRQ(ierr);
62   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr);
63 
64   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux);CHKERRQ(ierr);
65   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)));CHKERRQ(ierr);
66 
67   ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL);CHKERRQ(ierr);
68   ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]);CHKERRQ(ierr);
69   for (k=0; k < pipefgmres->vv_allocated; k++) {
70     pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
71   }
72 
73   PetscFunctionReturn(0);
74 }
75 
76 /*
77 
78     KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart.  Return residual
79                   history if requested.
80 
81     input parameters:
82 .        pipefgmres  - structure containing parameters and work areas
83 
84     output parameters:
85 .        itcount - number of iterations used.  If null, ignored.
86 .        converged - 0 if not converged
87 
88     Notes:
89     On entry, the value in vector VEC_VV(0) should be
90     the initial residual.
91 
92 
93  */
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "KSPPIPEFGMRESCycle"
97 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp)
98 {
99   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
100   PetscReal      res_norm;
101   PetscReal      hapbnd,tt;
102   PetscScalar    *hh,*hes,*lhh,shift = pipefgmres->shift;
103   PetscBool      hapend = PETSC_FALSE;  /* indicates happy breakdown ending */
104   PetscErrorCode ierr;
105   PetscInt       loc_it;                /* local count of # of dir. in Krylov space */
106   PetscInt       max_k = pipefgmres->max_k; /* max # of directions Krylov space */
107   PetscInt       i,j,k;
108   Mat            Amat,Pmat;
109   Vec            Q,W; /* Pipelining vectors */
110   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
111 
112   PetscFunctionBegin;
113 
114   /* We have not checked these routines for use with complex numbers. The inner products
115      are likely not defined correctly for that case */
116 #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX))
117   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars");
118 #endif
119 
120   if (itcount) *itcount = 0;
121 
122   /* Assign simpler names to these vectors, allocated as pipelining workspace */
123   Q = VEC_Q;
124   W = VEC_W;
125 
126   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
127   /* Note that we add an extra value here to allow for a single reduction */
128   if (!pipefgmres->orthogwork) { ierr = PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);CHKERRQ(ierr);
129   }
130   lhh = pipefgmres->orthogwork;
131 
132   /* Number of pseudo iterations since last restart is the number
133      of prestart directions */
134   loc_it = 0;
135 
136   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
137      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
138      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
139      (loc_it -1) is passed, so the two are equivalent */
140   pipefgmres->it = (loc_it - 1);
141 
142   /* initial residual is in VEC_VV(0)  - compute its norm*/
143   ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr);
144 
145   /* first entry in right-hand-side of hessenberg system is just
146      the initial residual norm */
147   *RS(0) = res_norm;
148 
149   ksp->rnorm = res_norm;
150   ierr       = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
151   ierr       = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
152 
153   /* check for the convergence - maybe the current guess is good enough */
154   ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
155   if (ksp->reason) {
156     if (itcount) *itcount = 0;
157     PetscFunctionReturn(0);
158   }
159 
160   /* scale VEC_VV (the initial residual) */
161   ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr);
162 
163   /* Fill the pipeline */
164   ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr);
165   ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
166   ierr = KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));CHKERRQ(ierr);
167   ierr = VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it));CHKERRQ(ierr); /* Note shift */
168 
169   /* MAIN ITERATION LOOP BEGINNING*/
170   /* keep iterating until we have converged OR generated the max number
171      of directions OR reached the max number of iterations for the method */
172   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
173     if (loc_it) {
174       ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
175       ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
176     }
177     pipefgmres->it = (loc_it - 1);
178 
179     /* see if more space is needed for work vectors */
180     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
181       ierr = KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr);
182       /* (loc_it+1) is passed in as number of the first vector that should
183          be allocated */
184     }
185 
186     /* Note that these inner products are with "Z" now, so
187        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
188        not the value from the equivalent FGMRES run (even in exact arithmetic)
189        That is, the H we need for the Arnoldi relation is different from the
190        coefficients we use in the orthogonalization process,because of the shift */
191 
192     /* Do some local twiddling to allow for a single reduction */
193     for(i=0;i<loc_it+1;i++){
194       redux[i] = VEC_VV(i);
195     }
196     redux[loc_it+1] = ZVEC(loc_it);
197 
198     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
199     ierr = VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr);
200 
201     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
202     ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));CHKERRQ(ierr);
203 
204     /* The work to be overlapped with the inner products follows.
205        This is application of the preconditioner and the operator
206        to compute intermediate quantites which will be combined (locally)
207        with the results of the inner products.
208        */
209     ierr = KSP_PCApply(ksp,ZVEC(loc_it),Q);CHKERRQ(ierr);
210     ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
211     ierr = KSP_MatMult(ksp,Amat,Q,W);CHKERRQ(ierr);
212 
213     /* Compute inner products of the new direction with previous directions,
214        and the norm of the to-be-orthogonalized direction "Z".
215        This information is enough to build the required entries
216        of H. The inner product with VEC_VV(it_loc) is
217        *different* than in the standard FGMRES and need to be dealt with specially.
218        That is, for standard FGMRES the orthogonalization coefficients are the same
219        as the coefficients used in the Arnoldi relation to reconstruct, but here this
220        is not true (albeit only for the one entry of H which we "unshift" below. */
221 
222     /* Finish the dot product, retrieving the extra entry */
223     ierr = VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr);
224     tt = PetscRealPart(lhh[loc_it+1]);
225 
226     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
227       Note that the Hessenberg entries require a shift, as these are for the
228       relation AU = VH, which is wrt unshifted basis vectors */
229     hh = HH(0,loc_it); hes=HES(0,loc_it);
230     for (j=0; j<loc_it; j++) {
231       hh[j]  = lhh[j];
232       hes[j] = lhh[j];
233     }
234     hh[loc_it]  = lhh[loc_it] + shift;
235     hes[loc_it] = lhh[loc_it] + shift;
236 
237     /* we delay applying the shift here */
238     for (j=0; j<=loc_it; j++) {
239       lhh[j]        = -lhh[j]; /* flip sign */
240     }
241 
242     /* Compute the norm of the un-normalized new direction using the rearranged formula
243        Note that these are shifted ("barred") quantities */
244     for(k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
245     if (tt < 0.0) {
246       /* If we detect square root breakdown in the norm, we must restart the algorithm.
247          Here this means we simply break the current loop and reconstruct the solution
248          using the basis we have computed thus far. Note that by breaking immediately,
249          we do not update the iteration count, so computation done in this iteration
250          should be disregarded.
251          */
252       ierr = PetscInfo1(ksp,"Restart due to square root breakdown at it = \n",ksp->its);CHKERRQ(ierr);
253       break;
254     } else {
255       tt = PetscSqrtReal(tt);
256     }
257 
258     /* new entry in hessenburg is the 2-norm of our new direction */
259     hh[loc_it+1]  = tt;
260     hes[loc_it+1] = tt;
261 
262     /* The recurred computation for the new direction
263        The division by tt is delayed to the happy breakdown check later
264        Note placement BEFORE the unshift
265        */
266     ierr = VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));CHKERRQ(ierr);
267     ierr = VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));CHKERRQ(ierr);
268     /* (VEC_VV(loc_it+1) is not normalized yet) */
269 
270     /* The recurred computation for the preconditioned vector (u) */
271     ierr = VecCopy(Q,PREVEC(loc_it+1));CHKERRQ(ierr);
272     ierr = VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));CHKERRQ(ierr);
273     ierr = VecScale(PREVEC(loc_it+1),1.0/tt);CHKERRQ(ierr);
274 
275     /* Unshift an entry in the GS coefficients ("removing the bar") */
276     lhh[loc_it]         -= shift;
277 
278     /* The recurred computation for z (Au)
279        Note placement AFTER the "unshift" */
280     ierr = VecCopy(W,ZVEC(loc_it+1));CHKERRQ(ierr);
281     ierr = VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));CHKERRQ(ierr);
282     ierr = VecScale(ZVEC(loc_it+1),1.0/tt);CHKERRQ(ierr);
283 
284     /* Happy Breakdown Check */
285     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
286     /* RS(loc_it) contains the res_norm from the last iteration  */
287     hapbnd = PetscMin(pipefgmres->haptol,hapbnd);
288     if (tt > hapbnd) {
289       /* scale new direction by its norm  */
290       ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr);
291     } else {
292       /* This happens when the solution is exactly reached. */
293       /* So there is no new direction... */
294       ierr   = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr);     /* set VEC_TEMP to 0 */
295       hapend = PETSC_TRUE;
296     }
297     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
298        current solution would not be exact if HES was singular.  Note that
299        HH non-singular implies that HES is not singular, and HES is guaranteed
300        to be nonsingular when PREVECS are linearly independent and A is
301        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
302        of HES). So we should really add a check to verify that HES is nonsingular.*/
303 
304     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
305        here to check that the hessenberg matrix is indeed non-singular (since
306        FGMRES does not guarantee this) */
307 
308     /* Now apply rotations to new col of hessenberg (and right side of system),
309        calculate new rotation, and get new residual norm at the same time*/
310     ierr = KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);CHKERRQ(ierr);
311     if (ksp->reason) break;
312 
313     loc_it++;
314     pipefgmres->it = (loc_it-1);   /* Add this here in case it has converged */
315 
316     ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
317     ksp->its++;
318     ksp->rnorm = res_norm;
319     ierr       = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
320 
321     ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
322 
323     /* Catch error in happy breakdown and signal convergence and break from loop */
324     if (hapend) {
325       if (!ksp->reason) {
326         if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm);
327         else {
328           ksp->reason = KSP_DIVERGED_BREAKDOWN;
329           break;
330         }
331       }
332     }
333   }
334   /* END OF ITERATION LOOP */
335   ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
336 
337   /*
338      Monitor if we know that we will not return for a restart */
339   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) {
340     ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
341   }
342 
343   if (itcount) *itcount = loc_it;
344 
345   /*
346     Down here we have to solve for the "best" coefficients of the Krylov
347     columns, add the solution values together, and possibly unwind the
348     preconditioning from the solution
349    */
350 
351   /* Form the solution (or the solution so far) */
352   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln
353      properly navigates */
354 
355   ierr = KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr);
356 
357   PetscFunctionReturn(0);
358 }
359 
360 /*
361     KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method.
362 
363 
364    Input Parameter:
365 .     ksp - the Krylov space object that was set to use pipefgmres
366 
367    Output Parameter:
368 .     outits - number of iterations used
369 
370 */
371 #undef __FUNCT__
372 #define __FUNCT__ "KSPSolve_PIPEFGMRES"
373 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
374 {
375   PetscErrorCode ierr;
376   PetscInt       its,itcount;
377   KSP_PIPEFGMRES *pipefgmres    = (KSP_PIPEFGMRES*)ksp->data;
378   PetscBool      guess_zero = ksp->guess_zero;
379 
380   PetscFunctionBegin;
381   ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr);
382 
383   if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
384   ierr     = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
385   ksp->its = 0;
386   ierr     = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
387 
388   itcount     = 0;
389   ksp->reason = KSP_CONVERGED_ITERATING;
390   while (!ksp->reason) {
391     ierr     = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr);
392     ierr     = KSPPIPEFGMRESCycle(&its,ksp);CHKERRQ(ierr);
393     itcount += its;
394     if (itcount >= ksp->max_it) {
395       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
396       break;
397     }
398     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
399   }
400   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "KSPDestroy_PIPEFGMRES"
406 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
407 {
408   PetscErrorCode ierr;
409 
410   PetscFunctionBegin;
411   ierr = KSPReset_PIPEFGMRES(ksp);CHKERRQ(ierr);
412   ierr = KSPDestroy_GMRES(ksp);CHKERRQ(ierr);
413   PetscFunctionReturn(0);
414 }
415 
416 /*
417     KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the
418                       current iterates.
419 
420     Input parameters:
421         nrs - work area of size it + 1.
422         vguess  - index of initial guess
423         vdest - index of result.  Note that vguess may == vdest (replace
424                 guess with the solution).
425         it - HH upper triangular part is a block of size (it+1) x (it+1)
426 
427      This is an internal routine that knows about the PIPEFGMRES internals.
428  */
429 #undef __FUNCT__
430 #define __FUNCT__ "KSPPIPEFGMRESBuildSoln"
431 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it)
432 {
433   PetscScalar    tt;
434   PetscErrorCode ierr;
435   PetscInt       k,j;
436   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
437 
438   PetscFunctionBegin;
439   /* Solve for solution vector that minimizes the residual */
440 
441   if (it < 0) {                                 /* no pipefgmres steps have been performed */
442     ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */
443     PetscFunctionReturn(0);
444   }
445 
446   /* solve the upper triangular system - RS is the right side and HH is
447      the upper triangular matrix  - put soln in nrs */
448   if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it);
449   else nrs[it] = 0.0;
450 
451   for (k=it-1; k>=0; k--) {
452     tt = *RS(k);
453     for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j];
454     nrs[k] = tt / *HH(k,k);
455   }
456 
457   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
458   ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr);
459   ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr);
460 
461   /* add solution to previous solution */
462   if (vdest == vguess) {
463     ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr);
464   } else {
465     ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr);
466   }
467   PetscFunctionReturn(0);
468 }
469 
470 /*
471 
472     KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
473                             Return new residual.
474 
475     input parameters:
476 
477 .        ksp -    Krylov space object
478 .        it  -    plane rotations are applied to the (it+1)th column of the
479                   modified hessenberg (i.e. HH(:,it))
480 .        hapend - PETSC_FALSE not happy breakdown ending.
481 
482     output parameters:
483 .        res - the new residual
484 
485  */
486 #undef __FUNCT__
487 #define __FUNCT__ "KSPPIPEFGMRESUpdateHessenberg"
488 /*
489 .  it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this
490  */
491 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res)
492 {
493   PetscScalar    *hh,*cc,*ss,*rs;
494   PetscInt       j;
495   PetscReal      hapbnd;
496   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
497   PetscErrorCode ierr;
498 
499   PetscFunctionBegin;
500   hh = HH(0,it);   /* pointer to beginning of column to update */
501   cc = CC(0);      /* beginning of cosine rotations */
502   ss = SS(0);      /* beginning of sine rotations */
503   rs = RS(0);      /* right hand side of least squares system */
504 
505   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
506   for (j=0; j<=it+1; j++) *HES(j,it) = hh[j];
507 
508   /* check for the happy breakdown */
509   hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol);
510   if (PetscAbsScalar(hh[it+1]) < hapbnd) {
511     ierr    = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr);
512     *hapend = PETSC_TRUE;
513   }
514 
515   /* Apply all the previously computed plane rotations to the new column
516      of the Hessenberg matrix */
517   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
518      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
519 
520   for (j=0; j<it; j++) {
521     PetscScalar hhj = hh[j];
522     hh[j]   = PetscConj(cc[j])*hhj + ss[j]*hh[j+1];
523     hh[j+1] =          -ss[j] *hhj + cc[j]*hh[j+1];
524   }
525 
526   /*
527     compute the new plane rotation, and apply it to:
528      1) the right-hand-side of the Hessenberg system (RS)
529         note: it affects RS(it) and RS(it+1)
530      2) the new column of the Hessenberg matrix
531         note: it affects HH(it,it) which is currently pointed to
532         by hh and HH(it+1, it) (*(hh+1))
533     thus obtaining the updated value of the residual...
534   */
535 
536   /* compute new plane rotation */
537 
538   if (!*hapend) {
539     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1])));
540     if (delta == 0.0) {
541       ksp->reason = KSP_DIVERGED_NULL;
542       PetscFunctionReturn(0);
543     }
544 
545     cc[it] = hh[it] / delta;    /* new cosine value */
546     ss[it] = hh[it+1] / delta;  /* new sine value */
547 
548     hh[it]   = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1];
549     rs[it+1] = -ss[it]*rs[it];
550     rs[it]   = PetscConj(cc[it])*rs[it];
551     *res     = PetscAbsScalar(rs[it+1]);
552   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
553             another rotation matrix (so RH doesn't change).  The new residual is
554             always the new sine term times the residual from last time (RS(it)),
555             but now the new sine rotation would be zero...so the residual should
556             be zero...so we will multiply "zero" by the last residual.  This might
557             not be exactly what we want to do here -could just return "zero". */
558 
559     *res = 0.0;
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 /*
565    KSPBuildSolution_PIPEFGMRES
566 
567      Input Parameter:
568 .     ksp - the Krylov space object
569 .     ptr-
570 
571    Output Parameter:
572 .     result - the solution
573 
574    Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle
575    calls directly.
576 
577 */
578 #undef __FUNCT__
579 #define __FUNCT__ "KSPBuildSolution_PIPEFGMRES"
580 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result)
581 {
582   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
583   PetscErrorCode ierr;
584 
585   PetscFunctionBegin;
586   if (!ptr) {
587     if (!pipefgmres->sol_temp) {
588       ierr = VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp);CHKERRQ(ierr);
589       ierr = PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp);CHKERRQ(ierr);
590     }
591     ptr = pipefgmres->sol_temp;
592   }
593   if (!pipefgmres->nrs) {
594     /* allocate the work area */
595     ierr = PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs);CHKERRQ(ierr);
596     ierr = PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar));CHKERRQ(ierr);
597   }
598 
599   ierr = KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it);CHKERRQ(ierr);
600   if (result) *result = ptr;
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "KSPSetFromOptions_PIPEFGMRES"
606 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
607 {
608   PetscErrorCode ierr;
609   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
610   PetscBool      flg;
611   PetscScalar    shift;
612 
613   PetscFunctionBegin;
614   ierr = KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);CHKERRQ(ierr);
615   ierr = PetscOptionsHead(PetscOptionsObject,"KSP pipelined FGMRES Options");CHKERRQ(ierr);
616   ierr = PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg);CHKERRQ(ierr);
617   if (flg) { ierr = KSPPIPEFGMRESSetShift(ksp,shift);CHKERRQ(ierr); }
618   ierr = PetscOptionsTail();CHKERRQ(ierr);
619   PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "KSPView_PIPEFGMRES"
624 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer)
625 {
626   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
627   PetscErrorCode ierr;
628   PetscBool      iascii,isstring;
629 
630   PetscFunctionBegin;
631   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
632   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
633 
634   if (iascii) {
635     ierr = PetscViewerASCIIPrintf(viewer,"  GMRES: restart=%D\n",pipefgmres->max_k);CHKERRQ(ierr);
636     ierr = PetscViewerASCIIPrintf(viewer,"  GMRES: happy breakdown tolerance %g\n",(double)pipefgmres->haptol);CHKERRQ(ierr);
637 #if defined(PETSC_USE_COMPLEX)
638     ierr = PetscViewerASCIIPrintf(viewer," PIPEFGMRES: shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr);
639 #else
640     ierr = PetscViewerASCIIPrintf(viewer," PIPEFGMRES: shift=%g\n",pipefgmres->shift);CHKERRQ(ierr);
641 #endif
642   } else if (isstring) {
643     ierr = PetscViewerStringSPrintf(viewer,"restart %D",pipefgmres->max_k);CHKERRQ(ierr);
644 #if defined(PETSC_USE_COMPLEX)
645     ierr = PetscViewerStringSPrintf(viewer,"   PIPEFGMRES: shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr);
646 #else
647     ierr = PetscViewerStringSPrintf(viewer,"   PIPEFGMRES: shift=%g\n",pipefgmres->shift);CHKERRQ(ierr);
648 #endif
649   }
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "KSPReset_PIPEFGMRES"
655 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
656 {
657   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
658   PetscErrorCode   ierr;
659   PetscInt         i;
660 
661   PetscFunctionBegin;
662   ierr = PetscFree(pipefgmres->prevecs);CHKERRQ(ierr);
663   ierr = PetscFree(pipefgmres->zvecs);CHKERRQ(ierr);
664   for (i=0; i<pipefgmres->nwork_alloc; i++) {
665     ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]);CHKERRQ(ierr);
666     ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]);CHKERRQ(ierr);
667   }
668   ierr = PetscFree(pipefgmres->prevecs_user_work);CHKERRQ(ierr);
669   ierr = PetscFree(pipefgmres->zvecs_user_work);CHKERRQ(ierr);
670   ierr = PetscFree(pipefgmres->redux);CHKERRQ(ierr);
671   ierr = KSPReset_GMRES(ksp);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 /*MC
676    KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method.
677 
678    A flexible, 1-stage pipelined variant of GMRES.
679 
680    Options Database Keys:
681 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
682 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
683 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
684 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
685                              vectors are allocated as needed)
686 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
687 
688 
689    Level: intermediate
690 
691    Notes:
692 
693    This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter.
694 
695    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
696 
697    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES).
698    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
699    See the FAQ on the PETSc website for details.
700 
701    Developer Notes: This class is subclassed off of KSPGMRES.
702 
703    Reference:
704     P. Sanan, S.M. Schnepp, and D.A. May,
705     "Pipelined, Flexible Krylov Subspace Methods,"
706     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
707     DOI: 10.1137/15M1049130
708 
709 .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES
710            KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEFGMRESSetShift()
711 M*/
712 
713 #undef __FUNCT__
714 #define __FUNCT__ "KSPCreate_PIPEFGMRES"
715 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
716 {
717   KSP_PIPEFGMRES *pipefgmres;
718   PetscErrorCode ierr;
719 
720   PetscFunctionBegin;
721   ierr = PetscNewLog(ksp,&pipefgmres);CHKERRQ(ierr);
722 
723   ksp->data                              = (void*)pipefgmres;
724   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
725   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
726   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
727   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
728   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
729   ksp->ops->view                         = KSPView_PIPEFGMRES;
730   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
731   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
732   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
733 
734   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3);CHKERRQ(ierr);
735 
736   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);CHKERRQ(ierr);
737   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);CHKERRQ(ierr);
738   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES);CHKERRQ(ierr);
739 
740   pipefgmres->nextra_vecs    = 1;
741   pipefgmres->haptol         = 1.0e-30;
742   pipefgmres->q_preallocate  = 0;
743   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
744   pipefgmres->orthog         = 0;
745   pipefgmres->nrs            = 0;
746   pipefgmres->sol_temp       = 0;
747   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
748   pipefgmres->Rsvd           = 0;
749   pipefgmres->orthogwork     = 0;
750   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
751   pipefgmres->shift          = 1.0;
752   PetscFunctionReturn(0);
753 }
754 
755 #undef __FUNCT__
756 #define __FUNCT__ "KSPPIPEFGMRESGetNewVectors"
757 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it)
758 {
759   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
760   PetscInt       nwork   = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
761   PetscInt       nalloc;                            /* number to allocate */
762   PetscErrorCode ierr;
763   PetscInt       k;
764 
765   PetscFunctionBegin;
766   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
767                                       in a single chunk */
768 
769   /* Adjust the number to allocate to make sure that we don't exceed the
770      number of available slots (pipefgmres->vecs_allocated)*/
771   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) {
772     nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
773   }
774   if (!nalloc) PetscFunctionReturn(0);
775 
776   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
777 
778   /* work vectors */
779   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL);CHKERRQ(ierr);
780   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]);CHKERRQ(ierr);
781   for (k=0; k < nalloc; k++) {
782     pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k];
783   }
784   /* specify size of chunk allocated */
785   pipefgmres->mwork_alloc[nwork] = nalloc;
786 
787   /* preconditioned vectors (note we don't use VEC_OFFSET) */
788   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL);CHKERRQ(ierr);
789   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]);CHKERRQ(ierr);
790   for (k=0; k < nalloc; k++) {
791     pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k];
792   }
793 
794   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL);CHKERRQ(ierr);
795   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]);CHKERRQ(ierr);
796   for (k=0; k < nalloc; k++) {
797     pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k];
798   }
799 
800   /* increment the number of work vector chunks */
801   pipefgmres->nwork_alloc++;
802   PetscFunctionReturn(0);
803 }
804 #undef __FUNCT__
805 #define __FUNCT__ "KSPPIPEFGMRESSetShift"
806 /*@
807   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver.
808 
809   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there).
810 
811 Logically Collective on KSP
812 
813 Input Parameters:
814 +  ksp - the Krylov space context
815 -  shift - the shift
816 
817 Level: intermediate
818 
819 Options Database:
820 . -ksp_pipefgmres_shift <shift>
821 
822 .seealso: KSPComputeEigenvalues()
823 @*/
824 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift)
825 {
826   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
827 
828   PetscFunctionBegin;
829   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
830   PetscValidLogicalCollectiveScalar(ksp,shift,2);
831   pipefgmres->shift = shift;
832   PetscFunctionReturn(0);
833 }
834