xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision 864c62dfd577300df05c8a4e4992489f187e8c79)
1 /*
2   Contributed by Patrick Sanan and Sascha M. Schnepp
3 */
4 
5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h>       /*I  "petscksp.h"  I*/
6 
7 static PetscBool  cited = PETSC_FALSE;
8 static const char citation[] =
9   "@article{SSM2016,\n"
10   "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
11   "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
12   "  journal = {SIAM Journal on Scientific Computing},\n"
13   "  volume = {38},\n"
14   "  number = {5},\n"
15   "  pages = {C441-C470},\n"
16   "  year = {2016},\n"
17   "  doi = {10.1137/15M1049130},\n"
18   "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
19   "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
20   "}\n";
21 
22 #define PIPEFGMRES_DELTA_DIRECTIONS 10
23 #define PIPEFGMRES_DEFAULT_MAXK     30
24 
25 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt);
26 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*);
27 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
28 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
29 
30 /*
31 
32     KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres.
33 
34     This is called once, usually automatically by KSPSolve() or KSPSetUp(),
35     but can be called directly by KSPSetUp().
36 
37 */
38 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
39 {
40   PetscErrorCode ierr;
41   PetscInt       k;
42   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
43   const PetscInt max_k = pipefgmres->max_k;
44 
45   PetscFunctionBegin;
46   ierr = KSPSetUp_GMRES(ksp);CHKERRQ(ierr);
47 
48   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs);CHKERRQ(ierr);
49   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work);CHKERRQ(ierr);
50   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr);
51 
52   ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL);CHKERRQ(ierr);
53   ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]);CHKERRQ(ierr);
54   for (k=0; k < pipefgmres->vv_allocated; k++) {
55     pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
56   }
57 
58   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs);CHKERRQ(ierr);
59   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work);CHKERRQ(ierr);
60   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr);
61 
62   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux);CHKERRQ(ierr);
63   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)));CHKERRQ(ierr);
64 
65   ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL);CHKERRQ(ierr);
66   ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]);CHKERRQ(ierr);
67   for (k=0; k < pipefgmres->vv_allocated; k++) {
68     pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
69   }
70 
71   PetscFunctionReturn(0);
72 }
73 
74 /*
75 
76     KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart.  Return residual
77                   history if requested.
78 
79     input parameters:
80 .        pipefgmres  - structure containing parameters and work areas
81 
82     output parameters:
83 .        itcount - number of iterations used.  If null, ignored.
84 .        converged - 0 if not converged
85 
86     Notes:
87     On entry, the value in vector VEC_VV(0) should be
88     the initial residual.
89 
90 
91  */
92 
93 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp)
94 {
95   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
96   PetscReal      res_norm;
97   PetscReal      hapbnd,tt;
98   PetscScalar    *hh,*hes,*lhh,shift = pipefgmres->shift;
99   PetscBool      hapend = PETSC_FALSE;  /* indicates happy breakdown ending */
100   PetscErrorCode ierr;
101   PetscInt       loc_it;                /* local count of # of dir. in Krylov space */
102   PetscInt       max_k = pipefgmres->max_k; /* max # of directions Krylov space */
103   PetscInt       i,j,k;
104   Mat            Amat,Pmat;
105   Vec            Q,W; /* Pipelining vectors */
106   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
107 
108   PetscFunctionBegin;
109   if (itcount) *itcount = 0;
110 
111   /* Assign simpler names to these vectors, allocated as pipelining workspace */
112   Q = VEC_Q;
113   W = VEC_W;
114 
115   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
116   /* Note that we add an extra value here to allow for a single reduction */
117   if (!pipefgmres->orthogwork) { ierr = PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);CHKERRQ(ierr);
118   }
119   lhh = pipefgmres->orthogwork;
120 
121   /* Number of pseudo iterations since last restart is the number
122      of prestart directions */
123   loc_it = 0;
124 
125   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
126      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
127      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
128      (loc_it -1) is passed, so the two are equivalent */
129   pipefgmres->it = (loc_it - 1);
130 
131   /* initial residual is in VEC_VV(0)  - compute its norm*/
132   ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr);
133 
134   /* first entry in right-hand-side of hessenberg system is just
135      the initial residual norm */
136   *RS(0) = res_norm;
137 
138   ksp->rnorm = res_norm;
139   ierr       = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
140   ierr       = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
141 
142   /* check for the convergence - maybe the current guess is good enough */
143   ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
144   if (ksp->reason) {
145     if (itcount) *itcount = 0;
146     PetscFunctionReturn(0);
147   }
148 
149   /* scale VEC_VV (the initial residual) */
150   ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr);
151 
152   /* Fill the pipeline */
153   ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr);
154   ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
155   ierr = KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));CHKERRQ(ierr);
156   ierr = VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it));CHKERRQ(ierr); /* Note shift */
157 
158   /* MAIN ITERATION LOOP BEGINNING*/
159   /* keep iterating until we have converged OR generated the max number
160      of directions OR reached the max number of iterations for the method */
161   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
162     if (loc_it) {
163       ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
164       ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
165     }
166     pipefgmres->it = (loc_it - 1);
167 
168     /* see if more space is needed for work vectors */
169     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
170       ierr = KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr);
171       /* (loc_it+1) is passed in as number of the first vector that should
172          be allocated */
173     }
174 
175     /* Note that these inner products are with "Z" now, so
176        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
177        not the value from the equivalent FGMRES run (even in exact arithmetic)
178        That is, the H we need for the Arnoldi relation is different from the
179        coefficients we use in the orthogonalization process,because of the shift */
180 
181     /* Do some local twiddling to allow for a single reduction */
182     for(i=0;i<loc_it+1;i++){
183       redux[i] = VEC_VV(i);
184     }
185     redux[loc_it+1] = ZVEC(loc_it);
186 
187     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
188     ierr = VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr);
189 
190     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
191     ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));CHKERRQ(ierr);
192 
193     /* The work to be overlapped with the inner products follows.
194        This is application of the preconditioner and the operator
195        to compute intermediate quantites which will be combined (locally)
196        with the results of the inner products.
197        */
198     ierr = KSP_PCApply(ksp,ZVEC(loc_it),Q);CHKERRQ(ierr);
199     ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
200     ierr = KSP_MatMult(ksp,Amat,Q,W);CHKERRQ(ierr);
201 
202     /* Compute inner products of the new direction with previous directions,
203        and the norm of the to-be-orthogonalized direction "Z".
204        This information is enough to build the required entries
205        of H. The inner product with VEC_VV(it_loc) is
206        *different* than in the standard FGMRES and need to be dealt with specially.
207        That is, for standard FGMRES the orthogonalization coefficients are the same
208        as the coefficients used in the Arnoldi relation to reconstruct, but here this
209        is not true (albeit only for the one entry of H which we "unshift" below. */
210 
211     /* Finish the dot product, retrieving the extra entry */
212     ierr = VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr);
213     tt = PetscRealPart(lhh[loc_it+1]);
214 
215     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
216       Note that the Hessenberg entries require a shift, as these are for the
217       relation AU = VH, which is wrt unshifted basis vectors */
218     hh = HH(0,loc_it); hes=HES(0,loc_it);
219     for (j=0; j<loc_it; j++) {
220       hh[j]  = lhh[j];
221       hes[j] = lhh[j];
222     }
223     hh[loc_it]  = lhh[loc_it] + shift;
224     hes[loc_it] = lhh[loc_it] + shift;
225 
226     /* we delay applying the shift here */
227     for (j=0; j<=loc_it; j++) {
228       lhh[j]        = -lhh[j]; /* flip sign */
229     }
230 
231     /* Compute the norm of the un-normalized new direction using the rearranged formula
232        Note that these are shifted ("barred") quantities */
233     for(k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
234     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
235     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0 ;
236     if (tt < 0.0) {
237       /* If we detect square root breakdown in the norm, we must restart the algorithm.
238          Here this means we simply break the current loop and reconstruct the solution
239          using the basis we have computed thus far. Note that by breaking immediately,
240          we do not update the iteration count, so computation done in this iteration
241          should be disregarded.
242          */
243       ierr = PetscInfo2(ksp,"Restart due to square root breakdown at it = %D, tt=%g\n",ksp->its,(double)tt);CHKERRQ(ierr);
244       break;
245     } else {
246       tt = PetscSqrtReal(tt);
247     }
248 
249     /* new entry in hessenburg is the 2-norm of our new direction */
250     hh[loc_it+1]  = tt;
251     hes[loc_it+1] = tt;
252 
253     /* The recurred computation for the new direction
254        The division by tt is delayed to the happy breakdown check later
255        Note placement BEFORE the unshift
256        */
257     ierr = VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));CHKERRQ(ierr);
258     ierr = VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));CHKERRQ(ierr);
259     /* (VEC_VV(loc_it+1) is not normalized yet) */
260 
261     /* The recurred computation for the preconditioned vector (u) */
262     ierr = VecCopy(Q,PREVEC(loc_it+1));CHKERRQ(ierr);
263     ierr = VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));CHKERRQ(ierr);
264     ierr = VecScale(PREVEC(loc_it+1),1.0/tt);CHKERRQ(ierr);
265 
266     /* Unshift an entry in the GS coefficients ("removing the bar") */
267     lhh[loc_it]         -= shift;
268 
269     /* The recurred computation for z (Au)
270        Note placement AFTER the "unshift" */
271     ierr = VecCopy(W,ZVEC(loc_it+1));CHKERRQ(ierr);
272     ierr = VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));CHKERRQ(ierr);
273     ierr = VecScale(ZVEC(loc_it+1),1.0/tt);CHKERRQ(ierr);
274 
275     /* Happy Breakdown Check */
276     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
277     /* RS(loc_it) contains the res_norm from the last iteration  */
278     hapbnd = PetscMin(pipefgmres->haptol,hapbnd);
279     if (tt > hapbnd) {
280       /* scale new direction by its norm  */
281       ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr);
282     } else {
283       /* This happens when the solution is exactly reached. */
284       /* So there is no new direction... */
285       ierr   = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr);     /* set VEC_TEMP to 0 */
286       hapend = PETSC_TRUE;
287     }
288     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
289        current solution would not be exact if HES was singular.  Note that
290        HH non-singular implies that HES is not singular, and HES is guaranteed
291        to be nonsingular when PREVECS are linearly independent and A is
292        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
293        of HES). So we should really add a check to verify that HES is nonsingular.*/
294 
295     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
296        here to check that the hessenberg matrix is indeed non-singular (since
297        FGMRES does not guarantee this) */
298 
299     /* Now apply rotations to new col of hessenberg (and right side of system),
300        calculate new rotation, and get new residual norm at the same time*/
301     ierr = KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);CHKERRQ(ierr);
302     if (ksp->reason) break;
303 
304     loc_it++;
305     pipefgmres->it = (loc_it-1);   /* Add this here in case it has converged */
306 
307     ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
308     ksp->its++;
309     ksp->rnorm = res_norm;
310     ierr       = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
311 
312     ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
313 
314     /* Catch error in happy breakdown and signal convergence and break from loop */
315     if (hapend) {
316       if (!ksp->reason) {
317         if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm);
318         else {
319           ksp->reason = KSP_DIVERGED_BREAKDOWN;
320           break;
321         }
322       }
323     }
324   }
325   /* END OF ITERATION LOOP */
326   ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
327 
328   /*
329      Monitor if we know that we will not return for a restart */
330   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) {
331     ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
332   }
333 
334   if (itcount) *itcount = loc_it;
335 
336   /*
337     Down here we have to solve for the "best" coefficients of the Krylov
338     columns, add the solution values together, and possibly unwind the
339     preconditioning from the solution
340    */
341 
342   /* Form the solution (or the solution so far) */
343   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln
344      properly navigates */
345 
346   ierr = KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr);
347 
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352     KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method.
353 
354 
355    Input Parameter:
356 .     ksp - the Krylov space object that was set to use pipefgmres
357 
358    Output Parameter:
359 .     outits - number of iterations used
360 
361 */
362 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
363 {
364   PetscErrorCode ierr;
365   PetscInt       its,itcount;
366   KSP_PIPEFGMRES *pipefgmres    = (KSP_PIPEFGMRES*)ksp->data;
367   PetscBool      guess_zero = ksp->guess_zero;
368 
369   PetscFunctionBegin;
370 
371   /* We have not checked these routines for use with complex numbers. The inner products
372      are likely not defined correctly for that case */
373 #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX))
374   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars");
375 #endif
376 
377   ierr = PetscCitationsRegister(citation,&cited);CHKERRQ(ierr);
378 
379   if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
380   ierr     = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
381   ksp->its = 0;
382   ierr     = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
383 
384   itcount     = 0;
385   ksp->reason = KSP_CONVERGED_ITERATING;
386   while (!ksp->reason) {
387     ierr     = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr);
388     ierr     = KSPPIPEFGMRESCycle(&its,ksp);CHKERRQ(ierr);
389     itcount += its;
390     if (itcount >= ksp->max_it) {
391       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
392       break;
393     }
394     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
395   }
396   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
397   PetscFunctionReturn(0);
398 }
399 
400 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
401 {
402   PetscErrorCode ierr;
403 
404   PetscFunctionBegin;
405   ierr = KSPReset_PIPEFGMRES(ksp);CHKERRQ(ierr);
406   ierr = KSPDestroy_GMRES(ksp);CHKERRQ(ierr);
407   PetscFunctionReturn(0);
408 }
409 
410 /*
411     KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the
412                       current iterates.
413 
414     Input parameters:
415         nrs - work area of size it + 1.
416         vguess  - index of initial guess
417         vdest - index of result.  Note that vguess may == vdest (replace
418                 guess with the solution).
419         it - HH upper triangular part is a block of size (it+1) x (it+1)
420 
421      This is an internal routine that knows about the PIPEFGMRES internals.
422  */
423 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it)
424 {
425   PetscScalar    tt;
426   PetscErrorCode ierr;
427   PetscInt       k,j;
428   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
429 
430   PetscFunctionBegin;
431   /* Solve for solution vector that minimizes the residual */
432 
433   if (it < 0) {                                 /* no pipefgmres steps have been performed */
434     ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */
435     PetscFunctionReturn(0);
436   }
437 
438   /* solve the upper triangular system - RS is the right side and HH is
439      the upper triangular matrix  - put soln in nrs */
440   if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it);
441   else nrs[it] = 0.0;
442 
443   for (k=it-1; k>=0; k--) {
444     tt = *RS(k);
445     for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j];
446     nrs[k] = tt / *HH(k,k);
447   }
448 
449   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
450   ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr);
451   ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr);
452 
453   /* add solution to previous solution */
454   if (vdest == vguess) {
455     ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr);
456   } else {
457     ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr);
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 /*
463 
464     KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
465                             Return new residual.
466 
467     input parameters:
468 
469 .        ksp -    Krylov space object
470 .        it  -    plane rotations are applied to the (it+1)th column of the
471                   modified hessenberg (i.e. HH(:,it))
472 .        hapend - PETSC_FALSE not happy breakdown ending.
473 
474     output parameters:
475 .        res - the new residual
476 
477  */
478 /*
479 .  it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this
480  */
481 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res)
482 {
483   PetscScalar    *hh,*cc,*ss,*rs;
484   PetscInt       j;
485   PetscReal      hapbnd;
486   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
487   PetscErrorCode ierr;
488 
489   PetscFunctionBegin;
490   hh = HH(0,it);   /* pointer to beginning of column to update */
491   cc = CC(0);      /* beginning of cosine rotations */
492   ss = SS(0);      /* beginning of sine rotations */
493   rs = RS(0);      /* right hand side of least squares system */
494 
495   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
496   for (j=0; j<=it+1; j++) *HES(j,it) = hh[j];
497 
498   /* check for the happy breakdown */
499   hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol);
500   if (PetscAbsScalar(hh[it+1]) < hapbnd) {
501     ierr    = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr);
502     *hapend = PETSC_TRUE;
503   }
504 
505   /* Apply all the previously computed plane rotations to the new column
506      of the Hessenberg matrix */
507   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
508      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
509 
510   for (j=0; j<it; j++) {
511     PetscScalar hhj = hh[j];
512     hh[j]   = PetscConj(cc[j])*hhj + ss[j]*hh[j+1];
513     hh[j+1] =          -ss[j] *hhj + cc[j]*hh[j+1];
514   }
515 
516   /*
517     compute the new plane rotation, and apply it to:
518      1) the right-hand-side of the Hessenberg system (RS)
519         note: it affects RS(it) and RS(it+1)
520      2) the new column of the Hessenberg matrix
521         note: it affects HH(it,it) which is currently pointed to
522         by hh and HH(it+1, it) (*(hh+1))
523     thus obtaining the updated value of the residual...
524   */
525 
526   /* compute new plane rotation */
527 
528   if (!*hapend) {
529     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1])));
530     if (delta == 0.0) {
531       ksp->reason = KSP_DIVERGED_NULL;
532       PetscFunctionReturn(0);
533     }
534 
535     cc[it] = hh[it] / delta;    /* new cosine value */
536     ss[it] = hh[it+1] / delta;  /* new sine value */
537 
538     hh[it]   = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1];
539     rs[it+1] = -ss[it]*rs[it];
540     rs[it]   = PetscConj(cc[it])*rs[it];
541     *res     = PetscAbsScalar(rs[it+1]);
542   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
543             another rotation matrix (so RH doesn't change).  The new residual is
544             always the new sine term times the residual from last time (RS(it)),
545             but now the new sine rotation would be zero...so the residual should
546             be zero...so we will multiply "zero" by the last residual.  This might
547             not be exactly what we want to do here -could just return "zero". */
548 
549     *res = 0.0;
550   }
551   PetscFunctionReturn(0);
552 }
553 
554 /*
555    KSPBuildSolution_PIPEFGMRES
556 
557      Input Parameter:
558 .     ksp - the Krylov space object
559 .     ptr-
560 
561    Output Parameter:
562 .     result - the solution
563 
564    Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle
565    calls directly.
566 
567 */
568 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result)
569 {
570   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
571   PetscErrorCode ierr;
572 
573   PetscFunctionBegin;
574   if (!ptr) {
575     if (!pipefgmres->sol_temp) {
576       ierr = VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp);CHKERRQ(ierr);
577       ierr = PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp);CHKERRQ(ierr);
578     }
579     ptr = pipefgmres->sol_temp;
580   }
581   if (!pipefgmres->nrs) {
582     /* allocate the work area */
583     ierr = PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs);CHKERRQ(ierr);
584     ierr = PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar));CHKERRQ(ierr);
585   }
586 
587   ierr = KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it);CHKERRQ(ierr);
588   if (result) *result = ptr;
589   PetscFunctionReturn(0);
590 }
591 
592 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
593 {
594   PetscErrorCode ierr;
595   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
596   PetscBool      flg;
597   PetscScalar    shift;
598 
599   PetscFunctionBegin;
600   ierr = KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);CHKERRQ(ierr);
601   ierr = PetscOptionsHead(PetscOptionsObject,"KSP pipelined FGMRES Options");CHKERRQ(ierr);
602   ierr = PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg);CHKERRQ(ierr);
603   if (flg) { ierr = KSPPIPEFGMRESSetShift(ksp,shift);CHKERRQ(ierr); }
604   ierr = PetscOptionsTail();CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer)
609 {
610   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
611   PetscErrorCode ierr;
612   PetscBool      iascii,isstring;
613 
614   PetscFunctionBegin;
615   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
617 
618   if (iascii) {
619     ierr = PetscViewerASCIIPrintf(viewer,"  restart=%D\n",pipefgmres->max_k);CHKERRQ(ierr);
620     ierr = PetscViewerASCIIPrintf(viewer,"  happy breakdown tolerance %g\n",(double)pipefgmres->haptol);CHKERRQ(ierr);
621 #if defined(PETSC_USE_COMPLEX)
622     ierr = PetscViewerASCIIPrintf(viewer,"  shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr);
623 #else
624     ierr = PetscViewerASCIIPrintf(viewer,"  shift=%g\n",pipefgmres->shift);CHKERRQ(ierr);
625 #endif
626   } else if (isstring) {
627     ierr = PetscViewerStringSPrintf(viewer,"restart %D",pipefgmres->max_k);CHKERRQ(ierr);
628 #if defined(PETSC_USE_COMPLEX)
629     ierr = PetscViewerStringSPrintf(viewer,"   shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr);
630 #else
631     ierr = PetscViewerStringSPrintf(viewer,"   shift=%g\n",pipefgmres->shift);CHKERRQ(ierr);
632 #endif
633   }
634   PetscFunctionReturn(0);
635 }
636 
637 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
638 {
639   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
640   PetscErrorCode   ierr;
641   PetscInt         i;
642 
643   PetscFunctionBegin;
644   ierr = PetscFree(pipefgmres->prevecs);CHKERRQ(ierr);
645   ierr = PetscFree(pipefgmres->zvecs);CHKERRQ(ierr);
646   for (i=0; i<pipefgmres->nwork_alloc; i++) {
647     ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]);CHKERRQ(ierr);
648     ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]);CHKERRQ(ierr);
649   }
650   ierr = PetscFree(pipefgmres->prevecs_user_work);CHKERRQ(ierr);
651   ierr = PetscFree(pipefgmres->zvecs_user_work);CHKERRQ(ierr);
652   ierr = PetscFree(pipefgmres->redux);CHKERRQ(ierr);
653   ierr = KSPReset_GMRES(ksp);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 /*MC
658    KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method.
659 
660    A flexible, 1-stage pipelined variant of GMRES.
661 
662    Options Database Keys:
663 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
664 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
665 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
666 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
667                              vectors are allocated as needed)
668 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
669 
670 
671    Level: intermediate
672 
673    Notes:
674 
675    This variant is not "explicitly normalized" like KSPPGMRES, and requires a shift parameter.
676 
677    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
678 
679    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with KSPFGMRES).
680    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
681    See the FAQ on the PETSc website for details.
682 
683    Developer Notes: This class is subclassed off of KSPGMRES.
684 
685    Reference:
686     P. Sanan, S.M. Schnepp, and D.A. May,
687     "Pipelined, Flexible Krylov Subspace Methods,"
688     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,
689     DOI: 10.1137/15M1049130
690 
691 .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES
692            KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEFGMRESSetShift()
693 M*/
694 
695 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
696 {
697   KSP_PIPEFGMRES *pipefgmres;
698   PetscErrorCode ierr;
699 
700   PetscFunctionBegin;
701   ierr = PetscNewLog(ksp,&pipefgmres);CHKERRQ(ierr);
702 
703   ksp->data                              = (void*)pipefgmres;
704   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
705   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
706   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
707   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
708   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
709   ksp->ops->view                         = KSPView_PIPEFGMRES;
710   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
711   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
712   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
713 
714   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3);CHKERRQ(ierr);
715 
716   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);CHKERRQ(ierr);
717   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);CHKERRQ(ierr);
718   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES);CHKERRQ(ierr);
719 
720   pipefgmres->nextra_vecs    = 1;
721   pipefgmres->haptol         = 1.0e-30;
722   pipefgmres->q_preallocate  = 0;
723   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
724   pipefgmres->orthog         = 0;
725   pipefgmres->nrs            = 0;
726   pipefgmres->sol_temp       = 0;
727   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
728   pipefgmres->Rsvd           = 0;
729   pipefgmres->orthogwork     = 0;
730   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
731   pipefgmres->shift          = 1.0;
732   PetscFunctionReturn(0);
733 }
734 
735 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it)
736 {
737   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
738   PetscInt       nwork   = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
739   PetscInt       nalloc;                            /* number to allocate */
740   PetscErrorCode ierr;
741   PetscInt       k;
742 
743   PetscFunctionBegin;
744   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
745                                       in a single chunk */
746 
747   /* Adjust the number to allocate to make sure that we don't exceed the
748      number of available slots (pipefgmres->vecs_allocated)*/
749   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) {
750     nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
751   }
752   if (!nalloc) PetscFunctionReturn(0);
753 
754   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
755 
756   /* work vectors */
757   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL);CHKERRQ(ierr);
758   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]);CHKERRQ(ierr);
759   for (k=0; k < nalloc; k++) {
760     pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k];
761   }
762   /* specify size of chunk allocated */
763   pipefgmres->mwork_alloc[nwork] = nalloc;
764 
765   /* preconditioned vectors (note we don't use VEC_OFFSET) */
766   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL);CHKERRQ(ierr);
767   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]);CHKERRQ(ierr);
768   for (k=0; k < nalloc; k++) {
769     pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k];
770   }
771 
772   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL);CHKERRQ(ierr);
773   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]);CHKERRQ(ierr);
774   for (k=0; k < nalloc; k++) {
775     pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k];
776   }
777 
778   /* increment the number of work vector chunks */
779   pipefgmres->nwork_alloc++;
780   PetscFunctionReturn(0);
781 }
782 /*@
783   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver.
784 
785   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there).
786 
787 Logically Collective on KSP
788 
789 Input Parameters:
790 +  ksp - the Krylov space context
791 -  shift - the shift
792 
793 Level: intermediate
794 
795 Options Database:
796 . -ksp_pipefgmres_shift <shift>
797 
798 .seealso: KSPComputeEigenvalues()
799 @*/
800 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift)
801 {
802   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
803 
804   PetscFunctionBegin;
805   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
806   PetscValidLogicalCollectiveScalar(ksp,shift,2);
807   pipefgmres->shift = shift;
808   PetscFunctionReturn(0);
809 }
810