xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision 599e5203dc761c064c74a0357650e75b048fd815)
1 
2 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h> /*I  "petscksp.h"  I*/
3 
4 static PetscBool  cited      = PETSC_FALSE;
5 static const char citation[] = "@article{SSM2016,\n"
6                                "  author = {P. Sanan and S.M. Schnepp and D.A. May},\n"
7                                "  title = {Pipelined, Flexible Krylov Subspace Methods},\n"
8                                "  journal = {SIAM Journal on Scientific Computing},\n"
9                                "  volume = {38},\n"
10                                "  number = {5},\n"
11                                "  pages = {C441-C470},\n"
12                                "  year = {2016},\n"
13                                "  doi = {10.1137/15M1049130},\n"
14                                "  URL = {http://dx.doi.org/10.1137/15M1049130},\n"
15                                "  eprint = {http://dx.doi.org/10.1137/15M1049130}\n"
16                                "}\n";
17 
18 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP, PetscInt);
19 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP, PetscInt, PetscBool *, PetscReal *);
20 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *, Vec, Vec, KSP, PetscInt);
21 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
22 
23 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
24 {
25   PetscInt        k;
26   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
27   const PetscInt  max_k      = pipefgmres->max_k;
28 
29   PetscFunctionBegin;
30   PetscCall(KSPSetUp_GMRES(ksp));
31 
32   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs));
33   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->prevecs_user_work));
34 
35   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->prevecs_user_work[0], 0, NULL));
36   for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
37 
38   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs));
39   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->zvecs_user_work));
40 
41   PetscCall(PetscMalloc1((VEC_OFFSET + max_k), &pipefgmres->redux));
42 
43   PetscCall(KSPCreateVecs(ksp, pipefgmres->vv_allocated, &pipefgmres->zvecs_user_work[0], 0, NULL));
44   for (k = 0; k < pipefgmres->vv_allocated; k++) pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
45 
46   PetscFunctionReturn(PETSC_SUCCESS);
47 }
48 
49 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount, KSP ksp)
50 {
51   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
52   PetscReal       res_norm;
53   PetscReal       hapbnd, tt;
54   PetscScalar    *hh, *hes, *lhh, shift = pipefgmres->shift;
55   PetscBool       hapend = PETSC_FALSE;      /* indicates happy breakdown ending */
56   PetscInt        loc_it;                    /* local count of # of dir. in Krylov space */
57   PetscInt        max_k = pipefgmres->max_k; /* max # of directions Krylov space */
58   PetscInt        i, j, k;
59   Mat             Amat, Pmat;
60   Vec             Q, W;                      /* Pipelining vectors */
61   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
62 
63   PetscFunctionBegin;
64   if (itcount) *itcount = 0;
65 
66   /* Assign simpler names to these vectors, allocated as pipelining workspace */
67   Q = VEC_Q;
68   W = VEC_W;
69 
70   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
71   /* Note that we add an extra value here to allow for a single reduction */
72   if (!pipefgmres->orthogwork) PetscCall(PetscMalloc1(pipefgmres->max_k + 2, &pipefgmres->orthogwork));
73   lhh = pipefgmres->orthogwork;
74 
75   /* Number of pseudo iterations since last restart is the number
76      of prestart directions */
77   loc_it = 0;
78 
79   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
80      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
81      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
82      (loc_it -1) is passed, so the two are equivalent */
83   pipefgmres->it = (loc_it - 1);
84 
85   /* initial residual is in VEC_VV(0)  - compute its norm*/
86   PetscCall(VecNorm(VEC_VV(0), NORM_2, &res_norm));
87 
88   /* first entry in right-hand-side of hessenberg system is just
89      the initial residual norm */
90   *RS(0) = res_norm;
91 
92   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
93   if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
94   else ksp->rnorm = 0;
95   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
96   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
97   PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
98 
99   /* check for the convergence - maybe the current guess is good enough */
100   PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
101   if (ksp->reason) {
102     if (itcount) *itcount = 0;
103     PetscFunctionReturn(PETSC_SUCCESS);
104   }
105 
106   /* scale VEC_VV (the initial residual) */
107   PetscCall(VecScale(VEC_VV(0), 1.0 / res_norm));
108 
109   /* Fill the pipeline */
110   PetscCall(KSP_PCApply(ksp, VEC_VV(loc_it), PREVEC(loc_it)));
111   PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
112   PetscCall(KSP_MatMult(ksp, Amat, PREVEC(loc_it), ZVEC(loc_it)));
113   PetscCall(VecAXPY(ZVEC(loc_it), -shift, VEC_VV(loc_it))); /* Note shift */
114 
115   /* MAIN ITERATION LOOP BEGINNING*/
116   /* keep iterating until we have converged OR generated the max number
117      of directions OR reached the max number of iterations for the method */
118   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
119     if (loc_it) {
120       PetscCall(KSPLogResidualHistory(ksp, res_norm));
121       PetscCall(KSPMonitor(ksp, ksp->its, res_norm));
122     }
123     pipefgmres->it = (loc_it - 1);
124 
125     /* see if more space is needed for work vectors */
126     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
127       PetscCall(KSPPIPEFGMRESGetNewVectors(ksp, loc_it + 1));
128       /* (loc_it+1) is passed in as number of the first vector that should be allocated */
129     }
130 
131     /* Note that these inner products are with "Z" now, so
132        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
133        not the value from the equivalent FGMRES run (even in exact arithmetic)
134        That is, the H we need for the Arnoldi relation is different from the
135        coefficients we use in the orthogonalization process,because of the shift */
136 
137     /* Do some local twiddling to allow for a single reduction */
138     for (i = 0; i < loc_it + 1; i++) redux[i] = VEC_VV(i);
139     redux[loc_it + 1] = ZVEC(loc_it);
140 
141     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
142     PetscCall(VecMDotBegin(ZVEC(loc_it), loc_it + 2, redux, lhh));
143 
144     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
145     PetscCall(PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it))));
146 
147     /* The work to be overlapped with the inner products follows.
148        This is application of the preconditioner and the operator
149        to compute intermediate quantites which will be combined (locally)
150        with the results of the inner products.
151        */
152     PetscCall(KSP_PCApply(ksp, ZVEC(loc_it), Q));
153     PetscCall(PCGetOperators(ksp->pc, &Amat, &Pmat));
154     PetscCall(KSP_MatMult(ksp, Amat, Q, W));
155 
156     /* Compute inner products of the new direction with previous directions,
157        and the norm of the to-be-orthogonalized direction "Z".
158        This information is enough to build the required entries
159        of H. The inner product with VEC_VV(it_loc) is
160        *different* than in the standard FGMRES and need to be dealt with specially.
161        That is, for standard FGMRES the orthogonalization coefficients are the same
162        as the coefficients used in the Arnoldi relation to reconstruct, but here this
163        is not true (albeit only for the one entry of H which we "unshift" below. */
164 
165     /* Finish the dot product, retrieving the extra entry */
166     PetscCall(VecMDotEnd(ZVEC(loc_it), loc_it + 2, redux, lhh));
167     tt = PetscRealPart(lhh[loc_it + 1]);
168 
169     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
170       Note that the Hessenberg entries require a shift, as these are for the
171       relation AU = VH, which is wrt unshifted basis vectors */
172     hh  = HH(0, loc_it);
173     hes = HES(0, loc_it);
174     for (j = 0; j < loc_it; j++) {
175       hh[j]  = lhh[j];
176       hes[j] = lhh[j];
177     }
178     hh[loc_it]  = lhh[loc_it] + shift;
179     hes[loc_it] = lhh[loc_it] + shift;
180 
181     /* we delay applying the shift here */
182     for (j = 0; j <= loc_it; j++) { lhh[j] = -lhh[j]; /* flip sign */ }
183 
184     /* Compute the norm of the un-normalized new direction using the rearranged formula
185        Note that these are shifted ("barred") quantities */
186     for (k = 0; k <= loc_it; k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
187     /* On AVX512 this is accumulating roundoff errors for eg: tt=-2.22045e-16 */
188     if ((tt < 0.0) && tt > -PETSC_SMALL) tt = 0.0;
189     if (tt < 0.0) {
190       /* If we detect square root breakdown in the norm, we must restart the algorithm.
191          Here this means we simply break the current loop and reconstruct the solution
192          using the basis we have computed thus far. Note that by breaking immediately,
193          we do not update the iteration count, so computation done in this iteration
194          should be disregarded.
195          */
196       PetscCall(PetscInfo(ksp, "Restart due to square root breakdown at it = %" PetscInt_FMT ", tt=%g\n", ksp->its, (double)tt));
197       break;
198     } else {
199       tt = PetscSqrtReal(tt);
200     }
201 
202     /* new entry in hessenburg is the 2-norm of our new direction */
203     hh[loc_it + 1]  = tt;
204     hes[loc_it + 1] = tt;
205 
206     /* The recurred computation for the new direction
207        The division by tt is delayed to the happy breakdown check later
208        Note placement BEFORE the unshift
209        */
210     PetscCall(VecCopy(ZVEC(loc_it), VEC_VV(loc_it + 1)));
211     PetscCall(VecMAXPY(VEC_VV(loc_it + 1), loc_it + 1, lhh, &VEC_VV(0)));
212     /* (VEC_VV(loc_it+1) is not normalized yet) */
213 
214     /* The recurred computation for the preconditioned vector (u) */
215     PetscCall(VecCopy(Q, PREVEC(loc_it + 1)));
216     PetscCall(VecMAXPY(PREVEC(loc_it + 1), loc_it + 1, lhh, &PREVEC(0)));
217     if (tt) PetscCall(VecScale(PREVEC(loc_it + 1), 1.0 / tt));
218 
219     /* Unshift an entry in the GS coefficients ("removing the bar") */
220     lhh[loc_it] -= shift;
221 
222     /* The recurred computation for z (Au)
223        Note placement AFTER the "unshift" */
224     PetscCall(VecCopy(W, ZVEC(loc_it + 1)));
225     PetscCall(VecMAXPY(ZVEC(loc_it + 1), loc_it + 1, lhh, &ZVEC(0)));
226     if (tt) PetscCall(VecScale(ZVEC(loc_it + 1), 1.0 / tt));
227 
228     /* Happy Breakdown Check */
229     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
230     /* RS(loc_it) contains the res_norm from the last iteration  */
231     hapbnd = PetscMin(pipefgmres->haptol, hapbnd);
232     if (tt > hapbnd) {
233       /* scale new direction by its norm  */
234       PetscCall(VecScale(VEC_VV(loc_it + 1), 1.0 / tt));
235     } else {
236       /* This happens when the solution is exactly reached. */
237       /* So there is no new direction... */
238       PetscCall(VecSet(VEC_TEMP, 0.0)); /* set VEC_TEMP to 0 */
239       hapend = PETSC_TRUE;
240     }
241     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
242        current solution would not be exact if HES was singular.  Note that
243        HH non-singular implies that HES is not singular, and HES is guaranteed
244        to be nonsingular when PREVECS are linearly independent and A is
245        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
246        of HES). So we should really add a check to verify that HES is nonsingular.*/
247 
248     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
249        here to check that the hessenberg matrix is indeed non-singular (since
250        FGMRES does not guarantee this) */
251 
252     /* Now apply rotations to new col of hessenberg (and right side of system),
253        calculate new rotation, and get new residual norm at the same time*/
254     PetscCall(KSPPIPEFGMRESUpdateHessenberg(ksp, loc_it, &hapend, &res_norm));
255     if (ksp->reason) break;
256 
257     loc_it++;
258     pipefgmres->it = (loc_it - 1); /* Add this here in case it has converged */
259 
260     PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
261     ksp->its++;
262     if (ksp->normtype != KSP_NORM_NONE) ksp->rnorm = res_norm;
263     else ksp->rnorm = 0;
264     PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
265 
266     PetscCall((*ksp->converged)(ksp, ksp->its, ksp->rnorm, &ksp->reason, ksp->cnvP));
267 
268     /* Catch error in happy breakdown and signal convergence and break from loop */
269     if (hapend) {
270       if (!ksp->reason) {
271         PetscCheck(!ksp->errorifnotconverged, PetscObjectComm((PetscObject)ksp), PETSC_ERR_NOT_CONVERGED, "Reached happy break down, but convergence was not indicated. Residual norm = %g", (double)res_norm);
272         ksp->reason = KSP_DIVERGED_BREAKDOWN;
273         break;
274       }
275     }
276   }
277   /* END OF ITERATION LOOP */
278   PetscCall(KSPLogResidualHistory(ksp, ksp->rnorm));
279 
280   /*
281      Monitor if we know that we will not return for a restart */
282   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) PetscCall(KSPMonitor(ksp, ksp->its, ksp->rnorm));
283 
284   if (itcount) *itcount = loc_it;
285 
286   /*
287     Solve for the "best" coefficients of the Krylov
288     columns, add the solution values together, and possibly unwind the
289     preconditioning from the solution
290    */
291 
292   /* Form the solution (or the solution so far) */
293   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln properly navigates */
294 
295   PetscCall(KSPPIPEFGMRESBuildSoln(RS(0), ksp->vec_sol, ksp->vec_sol, ksp, loc_it - 1));
296   PetscFunctionReturn(PETSC_SUCCESS);
297 }
298 
299 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
300 {
301   PetscInt        its, itcount;
302   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
303   PetscBool       guess_zero = ksp->guess_zero;
304 
305   PetscFunctionBegin;
306   /* We have not checked these routines for use with complex numbers. The inner products are likely not defined correctly for that case */
307   PetscCheck(!PetscDefined(USE_COMPLEX) || PetscDefined(SKIP_COMPLEX), PETSC_COMM_WORLD, PETSC_ERR_SUP, "PIPEFGMRES has not been implemented for use with complex scalars");
308 
309   PetscCall(PetscCitationsRegister(citation, &cited));
310 
311   PetscCheck(!ksp->calc_sings || pipefgmres->Rsvd, PetscObjectComm((PetscObject)ksp), PETSC_ERR_ORDER, "Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
312   PetscCall(PetscObjectSAWsTakeAccess((PetscObject)ksp));
313   ksp->its = 0;
314   PetscCall(PetscObjectSAWsGrantAccess((PetscObject)ksp));
315 
316   itcount     = 0;
317   ksp->reason = KSP_CONVERGED_ITERATING;
318   while (!ksp->reason) {
319     PetscCall(KSPInitialResidual(ksp, ksp->vec_sol, VEC_TEMP, VEC_TEMP_MATOP, VEC_VV(0), ksp->vec_rhs));
320     PetscCall(KSPPIPEFGMRESCycle(&its, ksp));
321     itcount += its;
322     if (itcount >= ksp->max_it) {
323       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
324       break;
325     }
326     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
327   }
328   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
329   PetscFunctionReturn(PETSC_SUCCESS);
330 }
331 
332 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
333 {
334   PetscFunctionBegin;
335   PetscCall(KSPReset_PIPEFGMRES(ksp));
336   PetscCall(KSPDestroy_GMRES(ksp));
337   PetscFunctionReturn(PETSC_SUCCESS);
338 }
339 
340 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs, Vec vguess, Vec vdest, KSP ksp, PetscInt it)
341 {
342   PetscScalar     tt;
343   PetscInt        k, j;
344   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
345 
346   PetscFunctionBegin;
347   if (it < 0) {                        /* no pipefgmres steps have been performed */
348     PetscCall(VecCopy(vguess, vdest)); /* VecCopy() is smart, exits immediately if vguess == vdest */
349     PetscFunctionReturn(PETSC_SUCCESS);
350   }
351 
352   /* Solve for solution vector that minimizes the residual */
353   /* solve the upper triangular system - RS is the right side and HH is
354      the upper triangular matrix  - put soln in nrs */
355   if (*HH(it, it) != 0.0) nrs[it] = *RS(it) / *HH(it, it);
356   else nrs[it] = 0.0;
357 
358   for (k = it - 1; k >= 0; k--) {
359     tt = *RS(k);
360     for (j = k + 1; j <= it; j++) tt -= *HH(k, j) * nrs[j];
361     nrs[k] = tt / *HH(k, k);
362   }
363 
364   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
365   PetscCall(VecMAXPBY(VEC_TEMP, it + 1, nrs, 0, &PREVEC(0)));
366 
367   /* add solution to previous solution */
368   if (vdest == vguess) {
369     PetscCall(VecAXPY(vdest, 1.0, VEC_TEMP));
370   } else {
371     PetscCall(VecWAXPY(vdest, 1.0, VEC_TEMP, vguess));
372   }
373   PetscFunctionReturn(PETSC_SUCCESS);
374 }
375 
376 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp, PetscInt it, PetscBool *hapend, PetscReal *res)
377 {
378   PetscScalar    *hh, *cc, *ss, *rs;
379   PetscInt        j;
380   PetscReal       hapbnd;
381   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)(ksp->data);
382 
383   PetscFunctionBegin;
384   hh = HH(0, it); /* pointer to beginning of column to update */
385   cc = CC(0);     /* beginning of cosine rotations */
386   ss = SS(0);     /* beginning of sine rotations */
387   rs = RS(0);     /* right hand side of least squares system */
388 
389   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
390   for (j = 0; j <= it + 1; j++) *HES(j, it) = hh[j];
391 
392   /* check for the happy breakdown */
393   hapbnd = PetscMin(PetscAbsScalar(hh[it + 1] / rs[it]), pipefgmres->haptol);
394   if (PetscAbsScalar(hh[it + 1]) < hapbnd) {
395     PetscCall(PetscInfo(ksp, "Detected happy breakdown, current hapbnd = %14.12e H(%" PetscInt_FMT ",%" PetscInt_FMT ") = %14.12e\n", (double)hapbnd, it + 1, it, (double)PetscAbsScalar(*HH(it + 1, it))));
396     *hapend = PETSC_TRUE;
397   }
398 
399   /* Apply all the previously computed plane rotations to the new column of the Hessenberg matrix */
400   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
401      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
402 
403   for (j = 0; j < it; j++) {
404     PetscScalar hhj = hh[j];
405     hh[j]           = PetscConj(cc[j]) * hhj + ss[j] * hh[j + 1];
406     hh[j + 1]       = -ss[j] * hhj + cc[j] * hh[j + 1];
407   }
408 
409   /*
410     compute the new plane rotation, and apply it to:
411      1) the right-hand-side of the Hessenberg system (RS)
412         note: it affects RS(it) and RS(it+1)
413      2) the new column of the Hessenberg matrix
414         note: it affects HH(it,it) which is currently pointed to
415         by hh and HH(it+1, it) (*(hh+1))
416     thus obtaining the updated value of the residual...
417   */
418 
419   /* compute new plane rotation */
420 
421   if (!*hapend) {
422     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it + 1])));
423     if (delta == 0.0) {
424       ksp->reason = KSP_DIVERGED_NULL;
425       PetscFunctionReturn(PETSC_SUCCESS);
426     }
427 
428     cc[it] = hh[it] / delta;     /* new cosine value */
429     ss[it] = hh[it + 1] / delta; /* new sine value */
430 
431     hh[it]     = PetscConj(cc[it]) * hh[it] + ss[it] * hh[it + 1];
432     rs[it + 1] = -ss[it] * rs[it];
433     rs[it]     = PetscConj(cc[it]) * rs[it];
434     *res       = PetscAbsScalar(rs[it + 1]);
435   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
436             another rotation matrix (so RH doesn't change).  The new residual is
437             always the new sine term times the residual from last time (RS(it)),
438             but now the new sine rotation would be zero...so the residual should
439             be zero...so we will multiply "zero" by the last residual.  This might
440             not be exactly what we want to do here -could just return "zero". */
441     *res = 0.0;
442   }
443   PetscFunctionReturn(PETSC_SUCCESS);
444 }
445 
446 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp, Vec ptr, Vec *result)
447 {
448   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
449 
450   PetscFunctionBegin;
451   if (!ptr) {
452     if (!pipefgmres->sol_temp) PetscCall(VecDuplicate(ksp->vec_sol, &pipefgmres->sol_temp));
453     ptr = pipefgmres->sol_temp;
454   }
455   if (!pipefgmres->nrs) {
456     /* allocate the work area */
457     PetscCall(PetscMalloc1(pipefgmres->max_k, &pipefgmres->nrs));
458   }
459 
460   PetscCall(KSPPIPEFGMRESBuildSoln(pipefgmres->nrs, ksp->vec_sol, ptr, ksp, pipefgmres->it));
461   if (result) *result = ptr;
462   PetscFunctionReturn(PETSC_SUCCESS);
463 }
464 
465 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(KSP ksp, PetscOptionItems *PetscOptionsObject)
466 {
467   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
468   PetscBool       flg;
469   PetscScalar     shift;
470 
471   PetscFunctionBegin;
472   PetscCall(KSPSetFromOptions_GMRES(ksp, PetscOptionsObject));
473   PetscOptionsHeadBegin(PetscOptionsObject, "KSP pipelined FGMRES Options");
474   PetscCall(PetscOptionsScalar("-ksp_pipefgmres_shift", "shift parameter", "KSPPIPEFGMRESSetShift", pipefgmres->shift, &shift, &flg));
475   if (flg) PetscCall(KSPPIPEFGMRESSetShift(ksp, shift));
476   PetscOptionsHeadEnd();
477   PetscFunctionReturn(PETSC_SUCCESS);
478 }
479 
480 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp, PetscViewer viewer)
481 {
482   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
483   PetscBool       iascii, isstring;
484 
485   PetscFunctionBegin;
486   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
487   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSTRING, &isstring));
488 
489   if (iascii) {
490     PetscCall(PetscViewerASCIIPrintf(viewer, "  restart=%" PetscInt_FMT "\n", pipefgmres->max_k));
491     PetscCall(PetscViewerASCIIPrintf(viewer, "  happy breakdown tolerance %g\n", (double)pipefgmres->haptol));
492 #if defined(PETSC_USE_COMPLEX)
493     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
494 #else
495     PetscCall(PetscViewerASCIIPrintf(viewer, "  shift=%g\n", (double)pipefgmres->shift));
496 #endif
497   } else if (isstring) {
498     PetscCall(PetscViewerStringSPrintf(viewer, "restart %" PetscInt_FMT, pipefgmres->max_k));
499 #if defined(PETSC_USE_COMPLEX)
500     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g+%gi\n", (double)PetscRealPart(pipefgmres->shift), (double)PetscImaginaryPart(pipefgmres->shift)));
501 #else
502     PetscCall(PetscViewerStringSPrintf(viewer, "   shift=%g\n", (double)pipefgmres->shift));
503 #endif
504   }
505   PetscFunctionReturn(PETSC_SUCCESS);
506 }
507 
508 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
509 {
510   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
511   PetscInt        i;
512 
513   PetscFunctionBegin;
514   PetscCall(PetscFree(pipefgmres->prevecs));
515   PetscCall(PetscFree(pipefgmres->zvecs));
516   for (i = 0; i < pipefgmres->nwork_alloc; i++) {
517     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->prevecs_user_work[i]));
518     PetscCall(VecDestroyVecs(pipefgmres->mwork_alloc[i], &pipefgmres->zvecs_user_work[i]));
519   }
520   PetscCall(PetscFree(pipefgmres->prevecs_user_work));
521   PetscCall(PetscFree(pipefgmres->zvecs_user_work));
522   PetscCall(PetscFree(pipefgmres->redux));
523   PetscCall(KSPReset_GMRES(ksp));
524   PetscFunctionReturn(PETSC_SUCCESS);
525 }
526 
527 /*MC
528    KSPPIPEFGMRES - Implements the Pipelined (1-stage) Flexible Generalized Minimal Residual method. [](sec_pipelineksp). [](sec_flexibleksp)
529 
530    Options Database Keys:
531 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
532 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
533 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of vectors are allocated as needed)
534 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
535 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
536 
537    Level: intermediate
538 
539    Notes:
540    This variant is not "explicitly normalized" like `KSPPGMRES`, and requires a shift parameter.
541 
542    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
543 
544    Only right preconditioning is supported (but this preconditioner may be nonlinear/variable/inexact, as with `KSPFGMRES`).
545 
546    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
547    See [](doc_faq_pipelined)
548 
549    Developer Note:
550    This class is subclassed off of `KSPGMRES`, see the source code in src/ksp/ksp/impls/gmres for comments on the structure of the code
551 
552    Contributed by:
553    P. Sanan and S.M. Schnepp
554 
555    Reference:
556 .  * -   P. Sanan, S.M. Schnepp, and D.A. May,  "Pipelined, Flexible Krylov Subspace Methods,"
557     SIAM Journal on Scientific Computing 2016 38:5, C441-C470,  DOI: 10.1137/15M1049130
558 
559 .seealso: [](ch_ksp), [](doc_faq_pipelined), [](sec_pipelineksp), [](sec_flexibleksp), `KSPCreate()`, `KSPSetType()`, `KSPType`, `KSP`, `KSPLGMRES`, `KSPPIPECG`, `KSPPIPECR`, `KSPPGMRES`, `KSPFGMRES`
560           `KSPGMRESSetRestart()`, `KSPGMRESSetHapTol()`, `KSPGMRESSetPreAllocateVectors()`, `KSPGMRESMonitorKrylov()`, `KSPPIPEFGMRESSetShift()`
561 M*/
562 
563 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
564 {
565   KSP_PIPEFGMRES *pipefgmres;
566 
567   PetscFunctionBegin;
568   PetscCall(PetscNew(&pipefgmres));
569 
570   ksp->data                              = (void *)pipefgmres;
571   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
572   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
573   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
574   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
575   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
576   ksp->ops->view                         = KSPView_PIPEFGMRES;
577   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
578   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
579   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
580 
581   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_UNPRECONDITIONED, PC_RIGHT, 3));
582   PetscCall(KSPSetSupportedNorm(ksp, KSP_NORM_NONE, PC_RIGHT, 1));
583 
584   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetPreAllocateVectors_C", KSPGMRESSetPreAllocateVectors_GMRES));
585   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESSetRestart_C", KSPGMRESSetRestart_GMRES));
586   PetscCall(PetscObjectComposeFunction((PetscObject)ksp, "KSPGMRESGetRestart_C", KSPGMRESGetRestart_GMRES));
587 
588   pipefgmres->nextra_vecs    = 1;
589   pipefgmres->haptol         = 1.0e-30;
590   pipefgmres->q_preallocate  = 0;
591   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
592   pipefgmres->orthog         = NULL;
593   pipefgmres->nrs            = NULL;
594   pipefgmres->sol_temp       = NULL;
595   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
596   pipefgmres->Rsvd           = NULL;
597   pipefgmres->orthogwork     = NULL;
598   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
599   pipefgmres->shift          = 1.0;
600   PetscFunctionReturn(PETSC_SUCCESS);
601 }
602 
603 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp, PetscInt it)
604 {
605   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
606   PetscInt        nwork      = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
607   PetscInt        nalloc;                               /* number to allocate */
608   PetscInt        k;
609 
610   PetscFunctionBegin;
611   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
612                                       in a single chunk */
613 
614   /* Adjust the number to allocate to make sure that we don't exceed the
615      number of available slots (pipefgmres->vecs_allocated)*/
616   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
617   if (!nalloc) PetscFunctionReturn(PETSC_SUCCESS);
618 
619   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
620 
621   /* work vectors */
622   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->user_work[nwork], 0, NULL));
623   for (k = 0; k < nalloc; k++) pipefgmres->vecs[it + VEC_OFFSET + k] = pipefgmres->user_work[nwork][k];
624   /* specify size of chunk allocated */
625   pipefgmres->mwork_alloc[nwork] = nalloc;
626 
627   /* preconditioned vectors (note we don't use VEC_OFFSET) */
628   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->prevecs_user_work[nwork], 0, NULL));
629   for (k = 0; k < nalloc; k++) pipefgmres->prevecs[it + k] = pipefgmres->prevecs_user_work[nwork][k];
630 
631   PetscCall(KSPCreateVecs(ksp, nalloc, &pipefgmres->zvecs_user_work[nwork], 0, NULL));
632   for (k = 0; k < nalloc; k++) pipefgmres->zvecs[it + k] = pipefgmres->zvecs_user_work[nwork][k];
633 
634   /* increment the number of work vector chunks */
635   pipefgmres->nwork_alloc++;
636   PetscFunctionReturn(PETSC_SUCCESS);
637 }
638 
639 /*@
640   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined `KSPPIPEFGMRES` solver.
641 
642   Logically Collective
643 
644   Input Parameters:
645 + ksp   - the Krylov space context
646 - shift - the shift
647 
648   Options Database Key:
649 . -ksp_pipefgmres_shift <shift> - set the shift parameter
650 
651   Level: intermediate
652 
653   Note:
654   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator.
655   This can be achieved with PETSc itself by using a few iterations of a Krylov method.
656   See `KSPComputeEigenvalues()` (and note the caveats there).
657 
658 .seealso: [](ch_ksp), `KSPPIPEFGMRES`, `KSPComputeEigenvalues()`
659 @*/
660 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp, PetscScalar shift)
661 {
662   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES *)ksp->data;
663 
664   PetscFunctionBegin;
665   PetscValidHeaderSpecific(ksp, KSP_CLASSID, 1);
666   PetscValidLogicalCollectiveScalar(ksp, shift, 2);
667   pipefgmres->shift = shift;
668   PetscFunctionReturn(PETSC_SUCCESS);
669 }
670