xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision aca0776feee4da889fbf4f9f3c60ccde70044ebc)
1 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I  "petscksp.h"  I*/
2 
3 static PetscBool  cited      = PETSC_FALSE;
4 static const char citation[] = "@article{SSM2016,\n"
5                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
6                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
7                                "  journal = {SIAM Journal on Scientific Computing},\n"
8                                "  volume = {38},\n"
9                                "  number = {5},\n"
10                                "  pages = {C441-C470},\n"
11                                "  year = {2016},\n"
12                                "  doi = {10.1137/15M1049130},\n"
13                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
14                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
15                                "}\n";
16 
17 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP, PetscInt);
18 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *);
19 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
20 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
21 
22 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
23 {
24   PetscInt        k;
25   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
26   const PetscInt  max_k      = pipefgmres->max_k;
27 
28   PetscFunctionBegin;
29   PetscCall(KSPSetUp_GMRES(ksp));
30 
31   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs));
32   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs_user_work));
33 
34   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->prevecs_user_work[0], 0, NULL));
35   for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
36 
37   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs));
38   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs_user_work));
39 
40   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->redux));
41 
42   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->zvecs_user_work[0], 0, NULL));
43   for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
44 
45   PetscFunctionReturn(PETSC_SUCCESS);
46 }
47 
48 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount, KSP ksp)
49 {
50   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
51   PetscReal       res_norm;
52   PetscReal       hapbnd, tt;
53   PetscScalar    *hh, *hes, *lhh, shift = pipefgmres->shift;
54   PetscBool       hapend = PETSC_FALSE;      /* indicates happy breakdown ending */
55   PetscInt        loc_it;                    /* local count of # of dir. in Krylov space */
56   PetscInt        max_k = pipefgmres->max_k; /* max # of directions Krylov space */
57   PetscInt        i, j, k;
58   Mat             Amat, Pmat;
59   Vec             Q, W;                      /* Pipelining vectors */
60   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
61 
62   PetscFunctionBegin;
63   if (itcount) *itcount = 0;
64 
65   /* Assign simpler names to these vectors, allocated as pipelining workspace */
66   Q = VEC_Q;
67   W = VEC_W;
68 
69   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
70   /* Note that we add an extra value here to allow for a single reduction */
71   if (!pipefgmres->orthogwork) PetscCall(PetscMalloc1(pipefgmres->max_k + 2, &pipefgmres->orthogwork));
72   lhh = pipefgmres->orthogwork;
73 
74   /* Number of pseudo iterations since last restart is the number
75      of prestart directions */
76   loc_it = 0;
77 
78   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
79      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
80      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
81      (loc_it -1) is passed, so the two are equivalent */
82   pipefgmres->it = (loc_it - 1);
83 
84   /* initial residual is in VEC_VV(0)  - compute its norm*/
85   PetscCall(VecNorm(VEC_VV(0), NORM_2, &res_norm));
86 
87   /* first entry in right-hand-side of hessenberg system is just
88      the initial residual norm */
89   *RS(0) = res_norm;
90 
91   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
92   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
93   else ksp->rnorm = 0;
94   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
95   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
96   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
97 
98   /* check for the convergence - maybe the current guess is good enough */
99   PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
100   if (ksp->reason) {
101     if (itcount) *itcount = 0;
102     PetscFunctionReturn(PETSC_SUCCESS);
103   }
104 
105   /* scale VEC_VV (the initial residual) */
106   PetscCall(VecScale(VEC_VV(0), 1.0 / res_norm));
107 
108   /* Fill the pipeline */
109   PetscCall(KSP_PCApply(ksp, VEC_VV(loc_it), PREVEC(loc_it)));
110   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
111   PetscCall(KSP_MatMult(ksp, Amat, PREVEC(loc_it), ZVEC(loc_it)));
112   PetscCall(VecAXPY(ZVEC(loc_it), -shift, VEC_VV(loc_it))); /* Note shift */
113 
114   /* MAIN ITERATION LOOP BEGINNING*/
115   /* keep iterating until we have converged OR generated the max number
116      of directions OR reached the max number of iterations for the method */
117   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
118     if (loc_it) {
119       PetscCall(KSPLogResidualHistory(ksp, res_norm));
120       PetscCall(KSPMonitor(ksp, ksp->its, res_norm));
121     }
122     pipefgmres->it = (loc_it - 1);
123 
124     /* see if more space is needed for work vectors */
125     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
126       PetscCall(KSPPIPEFGMRESGetNewVectors(ksp, loc_it + 1));
127       /* (loc_it+1) is passed in as number of the first vector that should be allocated */
128     }
129 
130     /* Note that these inner products are with "Z" now, so
131        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
132        not the value from the equivalent FGMRES run (even in exact arithmetic)
133        That is, the H we need for the Arnoldi relation is different from the
134        coefficients we use in the orthogonalization process,because of the shift */
135 
136     /* Do some local twiddling to allow for a single reduction */
137     for (i = 0; i < loc_it + 1; i++) redux[i] = VEC_VV(i);
138     redux[loc_it + 1] = ZVEC(loc_it);
139 
140     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
141     PetscCall(VecMDotBegin(ZVEC(loc_it), loc_it + 2, redux, lhh));
142 
143     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
144     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it))));
145 
146     /* The work to be overlapped with the inner products follows.
147        This is application of the preconditioner and the operator
148        to compute intermediate quantites which will be combined (locally)
149        with the results of the inner products.
150        */
151     PetscCall(KSP_PCApply(ksp, ZVEC(loc_it), Q));
152     PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
153     PetscCall(KSP_MatMult(ksp, Amat, Q, W));
154 
155     /* Compute inner products of the new direction with previous directions,
156        and the norm of the to-be-orthogonalized direction "Z".
157        This information is enough to build the required entries
158        of H. The inner product with VEC_VV(it_loc) is
159        *different* than in the standard FGMRES and need to be dealt with specially.
160        That is, for standard FGMRES the orthogonalization coefficients are the same
161        as the coefficients used in the Arnoldi relation to reconstruct, but here this
162        is not true (albeit only for the one entry of H which we "unshift" below. */
163 
164     /* Finish the dot product, retrieving the extra entry */
165     PetscCall(VecMDotEnd(ZVEC(loc_it), loc_it + 2, redux, lhh));
166     tt = PetscRealPart(lhh[loc_it + 1]);
167 
168     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
169       Note that the Hessenberg entries require a shift, as these are for the
170       relation AU = VH, which is wrt unshifted basis vectors */
171     hh  = HH(0, loc_it);
172     hes = HES(0, loc_it);
173     for (j = 0; j < loc_it; j++) {
174       hh[j]  = lhh[j];
175       hes[j] = lhh[j];
176     }
177     hh[loc_it]  = lhh[loc_it] + shift;
178     hes[loc_it] = lhh[loc_it] + shift;
179 
180     /* we delay applying the shift here */
181     for (j = 0; j <= loc_it; j++) { lhh[j] = -lhh[j]; /* flip sign */ }
182 
183     /* Compute the norm of the un-normalized new direction using the rearranged formula
184        Note that these are shifted ("barred") quantities */
185     for (k = 0; k <= loc_it; k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
186     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
187     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0;
188     if (tt < 0.0) {
189       /* If we detect square root breakdown in the norm, we must restart the algorithm.
190          Here this means we simply break the current loop and reconstruct the solution
191          using the basis we have computed thus far. Note that by breaking immediately,
192          we do not update the iteration count, so computation done in this iteration
193          should be disregarded.
194          */
195       PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n", ksp->its, (double)tt));
196       break;
197     } else {
198       tt = PetscSqrtReal(tt);
199     }
200 
201     /* new entry in hessenburg is the 2-norm of our new direction */
202     hh[loc_it + 1]  = tt;
203     hes[loc_it + 1] = tt;
204 
205     /* The recurred computation for the new direction
206        The division by tt is delayed to the happy breakdown check later
207        Note placement BEFORE the unshift
208        */
209     PetscCall(VecCopy(ZVEC(loc_it), VEC_VV(loc_it + 1)));
210     PetscCall(VecMAXPY(VEC_VV(loc_it + 1), loc_it + 1, lhh, &VEC_VV(0)));
211     /* (VEC_VV(loc_it+1) is not normalized yet) */
212 
213     /* The recurred computation for the preconditioned vector (u) */
214     PetscCall(VecCopy(Q, PREVEC(loc_it + 1)));
215     PetscCall(VecMAXPY(PREVEC(loc_it + 1), loc_it + 1, lhh, &PREVEC(0)));
216     if (tt) PetscCall(VecScale(PREVEC(loc_it + 1), 1.0 / tt));
217 
218     /* Unshift an entry in the GS coefficients ("removing the bar") */
219     lhh[loc_it] -= shift;
220 
221     /* The recurred computation for z (Au)
222        Note placement AFTER the "unshift" */
223     PetscCall(VecCopy(W, ZVEC(loc_it + 1)));
224     PetscCall(VecMAXPY(ZVEC(loc_it + 1), loc_it + 1, lhh, &ZVEC(0)));
225     if (tt) PetscCall(VecScale(ZVEC(loc_it + 1), 1.0 / tt));
226 
227     /* Happy Breakdown Check */
228     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
229     /* RS(loc_it) contains the res_norm from the last iteration  */
230     hapbnd = PetscMin(pipefgmres->haptol, hapbnd);
231     if (tt > hapbnd) {
232       /* scale new direction by its norm  */
233       PetscCall(VecScale(VEC_VV(loc_it + 1), 1.0 / tt));
234     } else {
235       /* This happens when the solution is exactly reached. */
236       /* So there is no new direction... */
237       PetscCall(VecSet(VEC_TEMP, 0.0)); /* set VEC_TEMP to 0 */
238       hapend = PETSC_TRUE;
239     }
240     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
241        current solution would not be exact if HES was singular.  Note that
242        HH non-singular implies that HES is not singular, and HES is guaranteed
243        to be nonsingular when PREVECS are linearly independent and A is
244        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
245        of HES). So we should really add a check to verify that HES is nonsingular.*/
246 
247     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
248        here to check that the hessenberg matrix is indeed non-singular (since
249        FGMRES does not guarantee this) */
250 
251     /* Now apply rotations to new col of hessenberg (and right side of system),
252        calculate new rotation, and get new residual norm at the same time*/
253     PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp, loc_it, &hapend, &res_norm));
254     if (ksp->reason) break;
255 
256     loc_it++;
257     pipefgmres->it = (loc_it - 1); /* Add this here in case it has converged */
258 
259     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
260     ksp->its++;
261     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
262     else ksp->rnorm = 0;
263     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
264 
265     PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
266 
267     /* Catch error in happy breakdown and signal convergence and break from loop */
268     if (hapend) {
269       if (!ksp->reason) {
270         PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "Reached happy break down, but convergence was not indicated. Residual norm = %g", (double)res_norm);
271         ksp->reason = KSP_DIVERGED_BREAKDOWN;
272         break;
273       }
274     }
275   }
276   /* END OF ITERATION LOOP */
277   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
278 
279   /*
280      Monitor if we know that we will not return for a restart */
281   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
282 
283   if (itcount) *itcount = loc_it;
284 
285   /*
286     Solve for the "best" coefficients of the Krylov
287     columns, add the solution values together, and possibly unwind the
288     preconditioning from the solution
289    */
290 
291   /* Form the solution (or the solution so far) */
292   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln properly navigates */
293 
294   PetscCall(KSPPIPEFGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, loc_it - 1));
295   PetscFunctionReturn(PETSC_SUCCESS);
296 }
297 
298 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
299 {
300   PetscInt        its, itcount;
301   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
302   PetscBool       guess_zero = ksp->guess_zero;
303 
304   PetscFunctionBegin;
305   /* We have not checked these routines for use with complex numbers. The inner products are likely not defined correctly for that case */
306   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
307 
308   PetscCall(PetscCitationsRegister(citation, &cited));
309 
310   PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
311   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
312   ksp->its = 0;
313   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
314 
315   itcount     = 0;
316   ksp->reason = KSP_CONVERGED_ITERATING;
317   while (!ksp->reason) {
318     PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
319     PetscCall(KSPPIPEFGMRESCycle(&its, ksp));
320     itcount += its;
321     if (itcount >= ksp->max_it) {
322       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
323       break;
324     }
325     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
326   }
327   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
328   PetscFunctionReturn(PETSC_SUCCESS);
329 }
330 
331 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
332 {
333   PetscFunctionBegin;
334   PetscCall(KSPReset_PIPEFGMRES(ksp));
335   PetscCall(KSPDestroy_GMRES(ksp));
336   PetscFunctionReturn(PETSC_SUCCESS);
337 }
338 
339 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it)
340 {
341   PetscScalar     tt;
342   PetscInt        k, j;
343   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
344 
345   PetscFunctionBegin;
346   if (it < 0) {                        /* no pipefgmres steps have been performed */
347     PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */
348     PetscFunctionReturn(PETSC_SUCCESS);
349   }
350 
351   /* Solve for solution vector that minimizes the residual */
352   /* solve the upper triangular system - RS is the right side and HH is
353      the upper triangular matrix  - put soln in nrs */
354   if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it);
355   else nrs[it] = 0.0;
356 
357   for (k = it - 1; k >= 0; k--) {
358     tt = *RS(k);
359     for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j];
360     nrs[k] = tt / *HH(k, k);
361   }
362 
363   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
364   PetscCall(VecMAXPBY(VEC_TEMP, it + 1, nrs, 0, &PREVEC(0)));
365 
366   /* add solution to previous solution */
367   if (vdest == vguess) {
368     PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
369   } else {
370     PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess));
371   }
372   PetscFunctionReturn(PETSC_SUCCESS);
373 }
374 
375 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res)
376 {
377   PetscScalar    *hh, *cc, *ss, *rs;
378   PetscInt        j;
379   PetscReal       hapbnd;
380   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
381 
382   PetscFunctionBegin;
383   hh = HH(0, it); /* pointer to beginning of column to update */
384   cc = CC(0);     /* beginning of cosine rotations */
385   ss = SS(0);     /* beginning of sine rotations */
386   rs = RS(0);     /* right hand side of least squares system */
387 
388   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
389   for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j];
390 
391   /* check for the happy breakdown */
392   hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pipefgmres->haptol);
393   if (PetscAbsScalar(hh[it + 1]) < hapbnd) {
394     PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it))));
395     *hapend = PETSC_TRUE;
396   }
397 
398   /* Apply all the previously computed plane rotations to the new column of the Hessenberg matrix */
399   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
400      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
401 
402   for (j = 0; j < it; j++) {
403     PetscScalar hhj = hh[j];
404     hh[j]           = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1];
405     hh[j + 1]       = -ss[j] * hhj + cc[j] * hh[j + 1];
406   }
407 
408   /*
409     compute the new plane rotation, and apply it to:
410      1) the right-hand-side of the Hessenberg system (RS)
411         note: it affects RS(it) and RS(it+1)
412      2) the new column of the Hessenberg matrix
413         note: it affects HH(it,it) which is currently pointed to
414         by hh and HH(it+1, it) (*(hh+1))
415     thus obtaining the updated value of the residual...
416   */
417 
418   /* compute new plane rotation */
419 
420   if (!*hapend) {
421     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1])));
422     if (delta == 0.0) {
423       ksp->reason = KSP_DIVERGED_NULL;
424       PetscFunctionReturn(PETSC_SUCCESS);
425     }
426 
427     cc[it] = hh[it] / delta;     /* new cosine value */
428     ss[it] = hh[it + 1] / delta; /* new sine value */
429 
430     hh[it]     = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1];
431     rs[it + 1] = -ss[it] * rs[it];
432     rs[it]     = PetscConj(cc[it]) * rs[it];
433     *res       = PetscAbsScalar(rs[it + 1]);
434   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
435             another rotation matrix (so RH doesn't change).  The new residual is
436             always the new sine term times the residual from last time (RS(it)),
437             but now the new sine rotation would be zero...so the residual should
438             be zero...so we will multiply "zero" by the last residual.  This might
439             not be exactly what we want to do here -could just return "zero". */
440     *res = 0.0;
441   }
442   PetscFunctionReturn(PETSC_SUCCESS);
443 }
444 
445 static PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp, Vec ptr, Vec *result)
446 {
447   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
448 
449   PetscFunctionBegin;
450   if (!ptr) {
451     if (!pipefgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &pipefgmres->sol_temp));
452     ptr = pipefgmres->sol_temp;
453   }
454   if (!pipefgmres->nrs) {
455     /* allocate the work area */
456     PetscCall(PetscMalloc1(pipefgmres->max_k, &pipefgmres->nrs));
457   }
458 
459   PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs, ksp->vec_sol, ptr, ksp, pipefgmres->it));
460   if (result) *result = ptr;
461   PetscFunctionReturn(PETSC_SUCCESS);
462 }
463 
464 static PetscErrorCode KSPSetFromOptions_PIPEFGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject)
465 {
466   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
467   PetscBool       flg;
468   PetscScalar     shift;
469 
470   PetscFunctionBegin;
471   PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
472   PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined FGMRES Options");
473   PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift", "shift parameter", "KSPPIPEFGMRESSetShift", pipefgmres->shift, &shift, &flg));
474   if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp, shift));
475   PetscOptionsHeadEnd();
476   PetscFunctionReturn(PETSC_SUCCESS);
477 }
478 
479 static PetscErrorCode KSPView_PIPEFGMRES(KSP ksp, PetscViewer viewer)
480 {
481   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
482   PetscBool       iascii, isstring;
483 
484   PetscFunctionBegin;
485   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
486   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
487 
488   if (iascii) {
489     PetscCall(PetscViewerASCIIPrintf(viewer, "  restart=%" PetscInt_FMT "\n", pipefgmres->max_k));
490     PetscCall(PetscViewerASCIIPrintf(viewer, "  happy breakdown tolerance %g\n", (double)pipefgmres->haptol));
491 #if defined(PETSC_USE_COMPLEX)
492     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
493 #else
494     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g\n", (double)pipefgmres->shift));
495 #endif
496   } else if (isstring) {
497     PetscCall(PetscViewerStringSPrintf(viewer, "restart %" PetscInt_FMT, pipefgmres->max_k));
498 #if defined(PETSC_USE_COMPLEX)
499     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
500 #else
501     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g\n", (double)pipefgmres->shift));
502 #endif
503   }
504   PetscFunctionReturn(PETSC_SUCCESS);
505 }
506 
507 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
508 {
509   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
510   PetscInt        i;
511 
512   PetscFunctionBegin;
513   PetscCall(PetscFree(pipefgmres->prevecs));
514   PetscCall(PetscFree(pipefgmres->zvecs));
515   for (i = 0; i < pipefgmres->nwork_alloc; i++) {
516     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->prevecs_user_work[i]));
517     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->zvecs_user_work[i]));
518   }
519   PetscCall(PetscFree(pipefgmres->prevecs_user_work));
520   PetscCall(PetscFree(pipefgmres->zvecs_user_work));
521   PetscCall(PetscFree(pipefgmres->redux));
522   PetscCall(KSPReset_GMRES(ksp));
523   PetscFunctionReturn(PETSC_SUCCESS);
524 }
525 
526 /*MC
527    KSPPIPEFGMRES - Implements the Pipelined (1-stage) Flexible Generalized Minimal Residual method. [](sec_pipelineksp). [](sec_flexibleksp)
528 
529    Options Database Keys:
530 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
531 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
532 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of vectors are allocated as needed)
533 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
534 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
535 
536    Level: intermediate
537 
538    Notes:
539    This variant is not "explicitly normalized" like `KSPPGMRES`, and requires a shift parameter.
540 
541    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
542 
543    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with `KSPFGMRES`).
544 
545    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
546    See [](doc_faq_pipelined)
547 
548    Developer Note:
549    This class is subclassed off of `KSPGMRES`, see the source code in src/ksp/ksp/impls/gmres for comments on the structure of the code
550 
551    Contributed by:
552    P. Sanan and S.M. Schnepp
553 
554    Reference:
555 .  * -   P. Sanan, S.M. Schnepp, and D.A. May,  "Pipelined, Flexible Krylov Subspace Methods,"
556     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,  DOI: 10.1137/15M1049130
557 
558 .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), [](sec_flexibleksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES`
559           `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()`
560 M*/
561 
562 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
563 {
564   KSP_PIPEFGMRES *pipefgmres;
565 
566   PetscFunctionBegin;
567   PetscCall(PetscNew(&pipefgmres));
568 
569   ksp->data                              = (void *)pipefgmres;
570   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
571   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
572   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
573   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
574   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
575   ksp->ops->view                         = KSPView_PIPEFGMRES;
576   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
577   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
578   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
579 
580   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 3));
581   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
582 
583   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
584   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
585   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES));
586 
587   pipefgmres->nextra_vecs    = 1;
588   pipefgmres->haptol         = 1.0e-30;
589   pipefgmres->q_preallocate  = 0;
590   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
591   pipefgmres->orthog         = NULL;
592   pipefgmres->nrs            = NULL;
593   pipefgmres->sol_temp       = NULL;
594   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
595   pipefgmres->Rsvd           = NULL;
596   pipefgmres->orthogwork     = NULL;
597   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
598   pipefgmres->shift          = 1.0;
599   PetscFunctionReturn(PETSC_SUCCESS);
600 }
601 
602 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp, PetscInt it)
603 {
604   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
605   PetscInt        nwork      = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
606   PetscInt        nalloc;                               /* number to allocate */
607   PetscInt        k;
608 
609   PetscFunctionBegin;
610   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
611                                       in a single chunk */
612 
613   /* Adjust the number to allocate to make sure that we don't exceed the
614      number of available slots (pipefgmres->vecs_allocated)*/
615   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
616   if (!nalloc) PetscFunctionReturn(PETSC_SUCCESS);
617 
618   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
619 
620   /* work vectors */
621   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->user_work[nwork], 0, NULL));
622   for (k = 0; k < nalloc; k++) pipefgmres->vecs[it + VEC_OFFSET + k] = pipefgmres->user_work[nwork][k];
623   /* specify size of chunk allocated */
624   pipefgmres->mwork_alloc[nwork] = nalloc;
625 
626   /* preconditioned vectors (note we don't use VEC_OFFSET) */
627   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->prevecs_user_work[nwork], 0, NULL));
628   for (k = 0; k < nalloc; k++) pipefgmres->prevecs[it + k] = pipefgmres->prevecs_user_work[nwork][k];
629 
630   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->zvecs_user_work[nwork], 0, NULL));
631   for (k = 0; k < nalloc; k++) pipefgmres->zvecs[it + k] = pipefgmres->zvecs_user_work[nwork][k];
632 
633   /* increment the number of work vector chunks */
634   pipefgmres->nwork_alloc++;
635   PetscFunctionReturn(PETSC_SUCCESS);
636 }
637 
638 /*@
639   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined `KSPPIPEFGMRES` solver.
640 
641   Logically Collective
642 
643   Input Parameters:
644 + ksp   - the Krylov space context
645 - shift - the shift
646 
647   Options Database Key:
648 . -ksp_pipefgmres_shift <shift> - set the shift parameter
649 
650   Level: intermediate
651 
652   Note:
653   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator.
654   This can be achieved with PETSc itself by using a few iterations of a Krylov method.
655   See `KSPComputeEigenvalues()` (and note the caveats there).
656 
657 .seealso: [](ch_ksp), `KSPPIPEFGMRES`, `KSPComputeEigenvalues()`
658 @*/
659 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp, PetscScalar shift)
660 {
661   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
662 
663   PetscFunctionBegin;
664   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
665   PetscValidLogicalCollectiveScalar(ksp, shift, 2);
666   pipefgmres->shift = shift;
667   PetscFunctionReturn(PETSC_SUCCESS);
668 }
669