xref: /petsc/src/ksp/ksp/impls/gmres/pipefgmres/pipefgmres.c (revision e282ce78569ebe8e1bd528e4d3fd57f50f8bb8d3)
1 /*
2   Contributed by Patrick Sanan and Sascha M. Schnepp
3 */
4 
5 #include <../src/ksp/ksp/impls/gmres/pipefgmres/pipefgmresimpl.h>       /*I  "petscksp.h"  I*/
6 
7 #define PIPEFGMRES_DELTA_DIRECTIONS 10
8 #define PIPEFGMRES_DEFAULT_MAXK     30
9 
10 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP,PetscInt);
11 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP,PetscInt,PetscBool*,PetscReal*);
12 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar*,Vec,Vec,KSP,PetscInt);
13 extern PetscErrorCode KSPReset_PIPEFGMRES(KSP);
14 
15 /*
16 
17     KSPSetUp_PIPEFGMRES - Sets up the workspace needed by pipefgmres.
18 
19     This is called once, usually automatically by KSPSolve() or KSPSetUp(),
20     but can be called directly by KSPSetUp().
21 
22 */
23 #undef __FUNCT__
24 #define __FUNCT__ "KSPSetUp_PIPEFGMRES"
25 static PetscErrorCode KSPSetUp_PIPEFGMRES(KSP ksp)
26 {
27   PetscErrorCode ierr;
28   PetscInt       k;
29   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
30   const PetscInt max_k = pipefgmres->max_k;
31 
32   PetscFunctionBegin;
33   ierr = KSPSetUp_GMRES(ksp);CHKERRQ(ierr);
34 
35   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs);CHKERRQ(ierr);
36   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->prevecs_user_work);CHKERRQ(ierr);
37   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr);
38 
39   ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->prevecs_user_work[0],0,NULL);CHKERRQ(ierr);
40   ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->prevecs_user_work[0]);CHKERRQ(ierr);
41   for (k=0; k < pipefgmres->vv_allocated; k++) {
42     pipefgmres->prevecs[k] = pipefgmres->prevecs_user_work[0][k];
43   }
44 
45   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs);CHKERRQ(ierr);
46   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->zvecs_user_work);CHKERRQ(ierr);
47   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(2*sizeof(void*)));CHKERRQ(ierr);
48 
49   ierr = PetscMalloc1((VEC_OFFSET+max_k),&pipefgmres->redux);CHKERRQ(ierr);
50   ierr = PetscLogObjectMemory((PetscObject)ksp,(VEC_OFFSET+max_k)*(sizeof(void*)));CHKERRQ(ierr);
51 
52   ierr = KSPCreateVecs(ksp,pipefgmres->vv_allocated,&pipefgmres->zvecs_user_work[0],0,NULL);CHKERRQ(ierr);
53   ierr = PetscLogObjectParents(ksp,pipefgmres->vv_allocated,pipefgmres->zvecs_user_work[0]);CHKERRQ(ierr);
54   for (k=0; k < pipefgmres->vv_allocated; k++) {
55     pipefgmres->zvecs[k] = pipefgmres->zvecs_user_work[0][k];
56   }
57 
58   PetscFunctionReturn(0);
59 }
60 
61 /*
62 
63     KSPPIPEFGMRESCycle - Run pipefgmres, possibly with restart.  Return residual
64                   history if requested.
65 
66     input parameters:
67 .        pipefgmres  - structure containing parameters and work areas
68 
69     output parameters:
70 .        itcount - number of iterations used.  If null, ignored.
71 .        converged - 0 if not converged
72 
73     Notes:
74     On entry, the value in vector VEC_VV(0) should be
75     the initial residual.
76 
77 
78  */
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "KSPPIPEFGMRESCycle"
82 static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp)
83 {
84   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
85   PetscReal      res_norm;
86   PetscReal      hapbnd,tt;
87   PetscScalar    *hh,*hes,*lhh,shift = pipefgmres->shift;
88   PetscBool      hapend = PETSC_FALSE;  /* indicates happy breakdown ending */
89   PetscErrorCode ierr;
90   PetscInt       loc_it;                /* local count of # of dir. in Krylov space */
91   PetscInt       max_k = pipefgmres->max_k; /* max # of directions Krylov space */
92   PetscInt       i,j,k;
93   Mat            Amat,Pmat;
94   Vec            Q,W; /* Pipelining vectors */
95   Vec            *redux = pipefgmres->redux; /* workspace for single reduction */
96 
97   PetscFunctionBegin;
98   if (itcount) *itcount = 0;
99 
100   /* Assign simpler names to these vectors, allocated as pipelining workspace */
101   Q = VEC_Q;
102   W = VEC_W;
103 
104   /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/
105   /* Note that we add an extra value here to allow for a single reduction */
106   if (!pipefgmres->orthogwork) { ierr = PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);CHKERRQ(ierr);
107   }
108   lhh = pipefgmres->orthogwork;
109 
110   /* Number of pseudo iterations since last restart is the number
111      of prestart directions */
112   loc_it = 0;
113 
114   /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in
115      KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln.
116      Note that when KSPPIPEFGMRESBuildSoln is called from this function,
117      (loc_it -1) is passed, so the two are equivalent */
118   pipefgmres->it = (loc_it - 1);
119 
120   /* initial residual is in VEC_VV(0)  - compute its norm*/
121   ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr);
122 
123   /* first entry in right-hand-side of hessenberg system is just
124      the initial residual norm */
125   *RS(0) = res_norm;
126 
127   ksp->rnorm = res_norm;
128   ierr       = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
129   ierr       = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
130 
131   /* check for the convergence - maybe the current guess is good enough */
132   ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
133   if (ksp->reason) {
134     if (itcount) *itcount = 0;
135     PetscFunctionReturn(0);
136   }
137 
138   /* scale VEC_VV (the initial residual) */
139   ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr);
140 
141   /* Fill the pipeline */
142   ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr);
143   ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
144   ierr = KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));CHKERRQ(ierr);
145   ierr = VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it));CHKERRQ(ierr); /* Note shift */
146 
147   /* MAIN ITERATION LOOP BEGINNING*/
148   /* keep iterating until we have converged OR generated the max number
149      of directions OR reached the max number of iterations for the method */
150   while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) {
151     if (loc_it) {
152       ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
153       ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
154     }
155     pipefgmres->it = (loc_it - 1);
156 
157     /* see if more space is needed for work vectors */
158     if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) {
159       ierr = KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr);
160       /* (loc_it+1) is passed in as number of the first vector that should
161          be allocated */
162     }
163 
164     /* Note that these inner products are with "Z" now, so
165        in particular, lhh[loc_it] is the 'barred' or 'shifted' value,
166        not the value from the equivalent FGMRES run (even in exact arithmetic)
167        That is, the H we need for the Arnoldi relation is different from the
168        coefficients we use in the orthogonalization process,because of the shift */
169 
170     /* Do some local twiddling to allow for a single reduction */
171     for(i=0;i<loc_it+1;i++){
172       redux[i] = VEC_VV(i);
173     }
174     redux[loc_it+1] = ZVEC(loc_it);
175 
176     /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */
177     ierr = VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr);
178 
179     /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/
180     ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));CHKERRQ(ierr);
181 
182     /* The work to be overlapped with the inner products follows.
183        This is application of the preconditioner and the operator
184        to compute intermediate quantites which will be combined (locally)
185        with the results of the inner products.
186        */
187     ierr = KSP_PCApply(ksp,ZVEC(loc_it),Q);CHKERRQ(ierr);
188     ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);
189     ierr = KSP_MatMult(ksp,Amat,Q,W);CHKERRQ(ierr);
190 
191     /* Compute inner products of the new direction with previous directions,
192        and the norm of the to-be-orthogonalized direction "Z".
193        This information is enough to build the required entries
194        of H. The inner product with VEC_VV(it_loc) is
195        *different* than in the standard FGMRES and need to be dealt with specially.
196        That is, for standard FGMRES the orthogonalization coefficients are the same
197        as the coefficients used in the Arnoldi relation to reconstruct, but here this
198        is not true (albeit only for the one entry of H which we "unshift" below. */
199 
200     /* Finish the dot product, retrieving the extra entry */
201     ierr = VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr);
202     tt = PetscRealPart(lhh[loc_it+1]);
203 
204     /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt
205       Note that the Hessenberg entries require a shift, as these are for the
206       relation AU = VH, which is wrt unshifted basis vectors */
207     hh = HH(0,loc_it); hes=HES(0,loc_it);
208     for (j=0; j<loc_it; j++) {
209       hh[j]  = lhh[j];
210       hes[j] = lhh[j];
211     }
212     hh[loc_it]  = lhh[loc_it] + shift;
213     hes[loc_it] = lhh[loc_it] + shift;
214 
215     /* we delay applying the shift here */
216     for (j=0; j<=loc_it; j++) {
217       lhh[j]        = -lhh[j]; /* flip sign */
218     }
219 
220     /* Compute the norm of the un-normalized new direction using the rearranged formula
221        Note that these are shifted ("barred") quantities */
222     for(k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k])));
223     if (tt < 0.0) {
224       /* If we detect square root breakdown in the norm, we must restart the algorithm.
225          Here this means we simply break the current loop and reconstruct the solution
226          using the basis we have computed thus far. Note that by breaking immediately,
227          we do not update the iteration count, so computation done in this iteration
228          should be disregarded.
229          */
230       ierr = PetscInfo1(ksp,"Restart due to square root breakdown at it = \n",ksp->its);CHKERRQ(ierr);
231       break;
232     } else {
233       tt = PetscSqrtReal(tt);
234     }
235 
236     /* new entry in hessenburg is the 2-norm of our new direction */
237     hh[loc_it+1]  = tt;
238     hes[loc_it+1] = tt;
239 
240     /* The recurred computation for the new direction
241        The division by tt is delayed to the happy breakdown check later
242        Note placement BEFORE the unshift
243        */
244     ierr = VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));CHKERRQ(ierr);
245     ierr = VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));CHKERRQ(ierr);
246     /* (VEC_VV(loc_it+1) is not normalized yet) */
247 
248     /* The recurred computation for the preconditioned vector (u) */
249     ierr = VecCopy(Q,PREVEC(loc_it+1));CHKERRQ(ierr);
250     ierr = VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));CHKERRQ(ierr);
251     ierr = VecScale(PREVEC(loc_it+1),1.0/tt);CHKERRQ(ierr);
252 
253     /* Unshift an entry in the GS coefficients ("removing the bar") */
254     lhh[loc_it]         -= shift;
255 
256     /* The recurred computation for z (Au)
257        Note placement AFTER the "unshift" */
258     ierr = VecCopy(W,ZVEC(loc_it+1));CHKERRQ(ierr);
259     ierr = VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));CHKERRQ(ierr);
260     ierr = VecScale(ZVEC(loc_it+1),1.0/tt);CHKERRQ(ierr);
261 
262     /* Happy Breakdown Check */
263     hapbnd = PetscAbsScalar((tt) / *RS(loc_it));
264     /* RS(loc_it) contains the res_norm from the last iteration  */
265     hapbnd = PetscMin(pipefgmres->haptol,hapbnd);
266     if (tt > hapbnd) {
267       /* scale new direction by its norm  */
268       ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr);
269     } else {
270       /* This happens when the solution is exactly reached. */
271       /* So there is no new direction... */
272       ierr   = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr);     /* set VEC_TEMP to 0 */
273       hapend = PETSC_TRUE;
274     }
275     /* note that for pipefgmres we could get HES(loc_it+1, loc_it)  = 0 and the
276        current solution would not be exact if HES was singular.  Note that
277        HH non-singular implies that HES is not singular, and HES is guaranteed
278        to be nonsingular when PREVECS are linearly independent and A is
279        nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity
280        of HES). So we should really add a check to verify that HES is nonsingular.*/
281 
282     /* Note that to be thorough, in debug mode, one could call a LAPACK routine
283        here to check that the hessenberg matrix is indeed non-singular (since
284        FGMRES does not guarantee this) */
285 
286     /* Now apply rotations to new col of hessenberg (and right side of system),
287        calculate new rotation, and get new residual norm at the same time*/
288     ierr = KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);CHKERRQ(ierr);
289     if (ksp->reason) break;
290 
291     loc_it++;
292     pipefgmres->it = (loc_it-1);   /* Add this here in case it has converged */
293 
294     ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
295     ksp->its++;
296     ksp->rnorm = res_norm;
297     ierr       = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
298 
299     ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
300 
301     /* Catch error in happy breakdown and signal convergence and break from loop */
302     if (hapend) {
303       if (!ksp->reason) {
304         if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm);
305         else {
306           ksp->reason = KSP_DIVERGED_BREAKDOWN;
307           break;
308         }
309       }
310     }
311   }
312   /* END OF ITERATION LOOP */
313   ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr);
314 
315   /*
316      Monitor if we know that we will not return for a restart */
317   if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) {
318     ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr);
319   }
320 
321   if (itcount) *itcount = loc_it;
322 
323   /*
324     Down here we have to solve for the "best" coefficients of the Krylov
325     columns, add the solution values together, and possibly unwind the
326     preconditioning from the solution
327    */
328 
329   /* Form the solution (or the solution so far) */
330   /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln
331      properly navigates */
332 
333   ierr = KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr);
334 
335   PetscFunctionReturn(0);
336 }
337 
338 /*
339     KSPSolve_PIPEFGMRES - This routine applies the PIPEFGMRES method.
340 
341 
342    Input Parameter:
343 .     ksp - the Krylov space object that was set to use pipefgmres
344 
345    Output Parameter:
346 .     outits - number of iterations used
347 
348 */
349 #undef __FUNCT__
350 #define __FUNCT__ "KSPSolve_PIPEFGMRES"
351 static PetscErrorCode KSPSolve_PIPEFGMRES(KSP ksp)
352 {
353   PetscErrorCode ierr;
354   PetscInt       its,itcount;
355   KSP_PIPEFGMRES *pipefgmres    = (KSP_PIPEFGMRES*)ksp->data;
356   PetscBool      guess_zero = ksp->guess_zero;
357 
358   PetscFunctionBegin;
359 
360   /* We have not checked these routines for use with complex numbers. The inner products
361      are likely not defined correctly for that case */
362 #if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX))
363   SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars");
364 #endif
365 
366   if (ksp->calc_sings && !pipefgmres->Rsvd) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ORDER,"Must call KSPSetComputeSingularValues() before KSPSetUp() is called");
367   ierr     = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
368   ksp->its = 0;
369   ierr     = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
370 
371   itcount     = 0;
372   ksp->reason = KSP_CONVERGED_ITERATING;
373   while (!ksp->reason) {
374     ierr     = KSPInitialResidual(ksp,ksp->vec_sol,VEC_TEMP,VEC_TEMP_MATOP,VEC_VV(0),ksp->vec_rhs);CHKERRQ(ierr);
375     ierr     = KSPPIPEFGMRESCycle(&its,ksp);CHKERRQ(ierr);
376     itcount += its;
377     if (itcount >= ksp->max_it) {
378       if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
379       break;
380     }
381     ksp->guess_zero = PETSC_FALSE; /* every future call to KSPInitialResidual() will have nonzero guess */
382   }
383   ksp->guess_zero = guess_zero; /* restore if user provided nonzero initial guess */
384   PetscFunctionReturn(0);
385 }
386 
387 #undef __FUNCT__
388 #define __FUNCT__ "KSPDestroy_PIPEFGMRES"
389 static PetscErrorCode KSPDestroy_PIPEFGMRES(KSP ksp)
390 {
391   PetscErrorCode ierr;
392 
393   PetscFunctionBegin;
394   ierr = KSPReset_PIPEFGMRES(ksp);CHKERRQ(ierr);
395   ierr = KSPDestroy_GMRES(ksp);CHKERRQ(ierr);
396   PetscFunctionReturn(0);
397 }
398 
399 /*
400     KSPPIPEFGMRESBuildSoln - create the solution from the starting vector and the
401                       current iterates.
402 
403     Input parameters:
404         nrs - work area of size it + 1.
405         vguess  - index of initial guess
406         vdest - index of result.  Note that vguess may == vdest (replace
407                 guess with the solution).
408         it - HH upper triangular part is a block of size (it+1) x (it+1)
409 
410      This is an internal routine that knows about the PIPEFGMRES internals.
411  */
412 #undef __FUNCT__
413 #define __FUNCT__ "KSPPIPEFGMRESBuildSoln"
414 static PetscErrorCode KSPPIPEFGMRESBuildSoln(PetscScalar *nrs,Vec vguess,Vec vdest,KSP ksp,PetscInt it)
415 {
416   PetscScalar    tt;
417   PetscErrorCode ierr;
418   PetscInt       k,j;
419   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
420 
421   PetscFunctionBegin;
422   /* Solve for solution vector that minimizes the residual */
423 
424   if (it < 0) {                                 /* no pipefgmres steps have been performed */
425     ierr = VecCopy(vguess,vdest);CHKERRQ(ierr); /* VecCopy() is smart, exits immediately if vguess == vdest */
426     PetscFunctionReturn(0);
427   }
428 
429   /* solve the upper triangular system - RS is the right side and HH is
430      the upper triangular matrix  - put soln in nrs */
431   if (*HH(it,it) != 0.0) nrs[it] = *RS(it) / *HH(it,it);
432   else nrs[it] = 0.0;
433 
434   for (k=it-1; k>=0; k--) {
435     tt = *RS(k);
436     for (j=k+1; j<=it; j++) tt -= *HH(k,j) * nrs[j];
437     nrs[k] = tt / *HH(k,k);
438   }
439 
440   /* Accumulate the correction to the solution of the preconditioned problem in VEC_TEMP */
441   ierr = VecZeroEntries(VEC_TEMP);CHKERRQ(ierr);
442   ierr = VecMAXPY(VEC_TEMP,it+1,nrs,&PREVEC(0));CHKERRQ(ierr);
443 
444   /* add solution to previous solution */
445   if (vdest == vguess) {
446     ierr = VecAXPY(vdest,1.0,VEC_TEMP);CHKERRQ(ierr);
447   } else {
448     ierr = VecWAXPY(vdest,1.0,VEC_TEMP,vguess);CHKERRQ(ierr);
449   }
450   PetscFunctionReturn(0);
451 }
452 
453 /*
454 
455     KSPPIPEFGMRESUpdateHessenberg - Do the scalar work for the orthogonalization.
456                             Return new residual.
457 
458     input parameters:
459 
460 .        ksp -    Krylov space object
461 .        it  -    plane rotations are applied to the (it+1)th column of the
462                   modified hessenberg (i.e. HH(:,it))
463 .        hapend - PETSC_FALSE not happy breakdown ending.
464 
465     output parameters:
466 .        res - the new residual
467 
468  */
469 #undef __FUNCT__
470 #define __FUNCT__ "KSPPIPEFGMRESUpdateHessenberg"
471 /*
472 .  it - column of the Hessenberg that is complete, PIPEFGMRES is actually computing two columns ahead of this
473  */
474 static PetscErrorCode KSPPIPEFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res)
475 {
476   PetscScalar    *hh,*cc,*ss,*rs;
477   PetscInt       j;
478   PetscReal      hapbnd;
479   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data);
480   PetscErrorCode ierr;
481 
482   PetscFunctionBegin;
483   hh = HH(0,it);   /* pointer to beginning of column to update */
484   cc = CC(0);      /* beginning of cosine rotations */
485   ss = SS(0);      /* beginning of sine rotations */
486   rs = RS(0);      /* right hand side of least squares system */
487 
488   /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
489   for (j=0; j<=it+1; j++) *HES(j,it) = hh[j];
490 
491   /* check for the happy breakdown */
492   hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pipefgmres->haptol);
493   if (PetscAbsScalar(hh[it+1]) < hapbnd) {
494     ierr    = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr);
495     *hapend = PETSC_TRUE;
496   }
497 
498   /* Apply all the previously computed plane rotations to the new column
499      of the Hessenberg matrix */
500   /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
501      and some refs have [c   s ; -conj(s)  c] (don't be confused!) */
502 
503   for (j=0; j<it; j++) {
504     PetscScalar hhj = hh[j];
505     hh[j]   = PetscConj(cc[j])*hhj + ss[j]*hh[j+1];
506     hh[j+1] =          -ss[j] *hhj + cc[j]*hh[j+1];
507   }
508 
509   /*
510     compute the new plane rotation, and apply it to:
511      1) the right-hand-side of the Hessenberg system (RS)
512         note: it affects RS(it) and RS(it+1)
513      2) the new column of the Hessenberg matrix
514         note: it affects HH(it,it) which is currently pointed to
515         by hh and HH(it+1, it) (*(hh+1))
516     thus obtaining the updated value of the residual...
517   */
518 
519   /* compute new plane rotation */
520 
521   if (!*hapend) {
522     PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1])));
523     if (delta == 0.0) {
524       ksp->reason = KSP_DIVERGED_NULL;
525       PetscFunctionReturn(0);
526     }
527 
528     cc[it] = hh[it] / delta;    /* new cosine value */
529     ss[it] = hh[it+1] / delta;  /* new sine value */
530 
531     hh[it]   = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1];
532     rs[it+1] = -ss[it]*rs[it];
533     rs[it]   = PetscConj(cc[it])*rs[it];
534     *res     = PetscAbsScalar(rs[it+1]);
535   } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
536             another rotation matrix (so RH doesn't change).  The new residual is
537             always the new sine term times the residual from last time (RS(it)),
538             but now the new sine rotation would be zero...so the residual should
539             be zero...so we will multiply "zero" by the last residual.  This might
540             not be exactly what we want to do here -could just return "zero". */
541 
542     *res = 0.0;
543   }
544   PetscFunctionReturn(0);
545 }
546 
547 /*
548    KSPBuildSolution_PIPEFGMRES
549 
550      Input Parameter:
551 .     ksp - the Krylov space object
552 .     ptr-
553 
554    Output Parameter:
555 .     result - the solution
556 
557    Note: this calls KSPPIPEFGMRESBuildSoln - the same function that KSPPIPEFGMRESCycle
558    calls directly.
559 
560 */
561 #undef __FUNCT__
562 #define __FUNCT__ "KSPBuildSolution_PIPEFGMRES"
563 PetscErrorCode KSPBuildSolution_PIPEFGMRES(KSP ksp,Vec ptr,Vec *result)
564 {
565   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   if (!ptr) {
570     if (!pipefgmres->sol_temp) {
571       ierr = VecDuplicate(ksp->vec_sol,&pipefgmres->sol_temp);CHKERRQ(ierr);
572       ierr = PetscLogObjectParent((PetscObject)ksp,(PetscObject)pipefgmres->sol_temp);CHKERRQ(ierr);
573     }
574     ptr = pipefgmres->sol_temp;
575   }
576   if (!pipefgmres->nrs) {
577     /* allocate the work area */
578     ierr = PetscMalloc1(pipefgmres->max_k,&pipefgmres->nrs);CHKERRQ(ierr);
579     ierr = PetscLogObjectMemory((PetscObject)ksp,pipefgmres->max_k*sizeof(PetscScalar));CHKERRQ(ierr);
580   }
581 
582   ierr = KSPPIPEFGMRESBuildSoln(pipefgmres->nrs,ksp->vec_sol,ptr,ksp,pipefgmres->it);CHKERRQ(ierr);
583   if (result) *result = ptr;
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "KSPSetFromOptions_PIPEFGMRES"
589 PetscErrorCode KSPSetFromOptions_PIPEFGMRES(PetscOptionItems *PetscOptionsObject,KSP ksp)
590 {
591   PetscErrorCode ierr;
592   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
593   PetscBool      flg;
594   PetscScalar    shift;
595 
596   PetscFunctionBegin;
597   ierr = KSPSetFromOptions_GMRES(PetscOptionsObject,ksp);CHKERRQ(ierr);
598   ierr = PetscOptionsHead(PetscOptionsObject,"KSP pipelined FGMRES Options");CHKERRQ(ierr);
599   ierr = PetscOptionsScalar("-ksp_pipefgmres_shift","shift parameter","KSPPIPEFGMRESSetShift",pipefgmres->shift,&shift,&flg);CHKERRQ(ierr);
600   if (flg) { ierr = KSPPIPEFGMRESSetShift(ksp,shift);CHKERRQ(ierr); }
601   ierr = PetscOptionsTail();CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "KSPView_PIPEFGMRES"
607 PetscErrorCode KSPView_PIPEFGMRES(KSP ksp,PetscViewer viewer)
608 {
609   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
610   PetscErrorCode ierr;
611   PetscBool      iascii,isstring;
612 
613   PetscFunctionBegin;
614   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
615   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);CHKERRQ(ierr);
616 
617   if (iascii) {
618     ierr = PetscViewerASCIIPrintf(viewer,"  GMRES: restart=%D\n",pipefgmres->max_k);CHKERRQ(ierr);
619     ierr = PetscViewerASCIIPrintf(viewer,"  GMRES: happy breakdown tolerance %g\n",(double)pipefgmres->haptol);CHKERRQ(ierr);
620 #if defined(PETSC_USE_COMPLEX)
621     ierr = PetscViewerASCIIPrintf(viewer," PIPEFGMRES: shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr);
622 #else
623     ierr = PetscViewerASCIIPrintf(viewer," PIPEFGMRES: shift=%g\n",pipefgmres->shift);CHKERRQ(ierr);
624 #endif
625   } else if (isstring) {
626     ierr = PetscViewerStringSPrintf(viewer,"restart %D",pipefgmres->max_k);CHKERRQ(ierr);
627 #if defined(PETSC_USE_COMPLEX)
628     ierr = PetscViewerStringSPrintf(viewer,"   PIPEFGMRES: shift=%g+%gi\n",PetscRealPart(pipefgmres->shift),PetscImaginaryPart(pipefgmres->shift));CHKERRQ(ierr);
629 #else
630     ierr = PetscViewerStringSPrintf(viewer,"   PIPEFGMRES: shift=%g\n",pipefgmres->shift);CHKERRQ(ierr);
631 #endif
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 #undef __FUNCT__
637 #define __FUNCT__ "KSPReset_PIPEFGMRES"
638 PetscErrorCode KSPReset_PIPEFGMRES(KSP ksp)
639 {
640   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
641   PetscErrorCode   ierr;
642   PetscInt         i;
643 
644   PetscFunctionBegin;
645   ierr = PetscFree(pipefgmres->prevecs);CHKERRQ(ierr);
646   ierr = PetscFree(pipefgmres->zvecs);CHKERRQ(ierr);
647   for (i=0; i<pipefgmres->nwork_alloc; i++) {
648     ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->prevecs_user_work[i]);CHKERRQ(ierr);
649     ierr = VecDestroyVecs(pipefgmres->mwork_alloc[i],&pipefgmres->zvecs_user_work[i]);CHKERRQ(ierr);
650   }
651   ierr = PetscFree(pipefgmres->prevecs_user_work);CHKERRQ(ierr);
652   ierr = PetscFree(pipefgmres->zvecs_user_work);CHKERRQ(ierr);
653   ierr = PetscFree(pipefgmres->redux);CHKERRQ(ierr);
654   ierr = KSPReset_GMRES(ksp);CHKERRQ(ierr);
655   PetscFunctionReturn(0);
656 }
657 
658 /*MC
659    KSPPIPEFGMRES - Implements the Pipelined Generalized Minimal Residual method.
660 
661    A Flexible, 1-stage pipelined variant of GMRES
662 
663    This variant is not "explicitly normalized" like PGMRES, and requires a shift parameter.
664 
665    A heuristic for choosing the shift parameter is the largest eigenvalue of the preconditioned operator.
666 
667    Only right preconditioning is supported (but this preconditioner may be nonlinear, as with FGMRES)
668 
669    Options Database Keys:
670 +   -ksp_gmres_restart <restart> - the number of Krylov directions to orthogonalize against
671 .   -ksp_gmres_haptol <tol> - sets the tolerance for "happy ending" (exact convergence)
672 .   -ksp_gmres_preallocate - preallocate all the Krylov search directions initially (otherwise groups of
673 .   -ksp_pipefgmres_shift - the shift to use (defaults to 1. See KSPPIPEFGMRESSetShift()
674                              vectors are allocated as needed)
675 -   -ksp_gmres_krylov_monitor - plot the Krylov space generated
676 
677 
678    Level: beginner
679 
680    Notes:
681    MPI configuration may be necessary for reductions to make asynchronous progress, which is important for performance of pipelined methods.
682    See the FAQ on the PETSc website for details.
683 
684    Developer Notes: This object is subclassed off of KSPGMRES
685 
686 .seealso:  KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPLGMRES, KSPPIPECG, KSPPIPECR, KSPPGMRES, KSPFGMRES
687            KSPGMRESSetRestart(), KSPGMRESSetHapTol(), KSPGMRESSetPreAllocateVectors(), KSPGMRESMonitorKrylov(), KSPPIPEGMRESSetShift()
688 M*/
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "KSPCreate_PIPEFGMRES"
692 PETSC_EXTERN PetscErrorCode KSPCreate_PIPEFGMRES(KSP ksp)
693 {
694   KSP_PIPEFGMRES *pipefgmres;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = PetscNewLog(ksp,&pipefgmres);CHKERRQ(ierr);
699 
700   ksp->data                              = (void*)pipefgmres;
701   ksp->ops->buildsolution                = KSPBuildSolution_PIPEFGMRES;
702   ksp->ops->setup                        = KSPSetUp_PIPEFGMRES;
703   ksp->ops->solve                        = KSPSolve_PIPEFGMRES;
704   ksp->ops->reset                        = KSPReset_PIPEFGMRES;
705   ksp->ops->destroy                      = KSPDestroy_PIPEFGMRES;
706   ksp->ops->view                         = KSPView_PIPEFGMRES;
707   ksp->ops->setfromoptions               = KSPSetFromOptions_PIPEFGMRES;
708   ksp->ops->computeextremesingularvalues = KSPComputeExtremeSingularValues_GMRES;
709   ksp->ops->computeeigenvalues           = KSPComputeEigenvalues_GMRES;
710 
711   ierr = KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,3);CHKERRQ(ierr);
712 
713   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetPreAllocateVectors_C",KSPGMRESSetPreAllocateVectors_GMRES);CHKERRQ(ierr);
714   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESSetRestart_C",KSPGMRESSetRestart_GMRES);CHKERRQ(ierr);
715   ierr = PetscObjectComposeFunction((PetscObject)ksp,"KSPGMRESGetRestart_C",KSPGMRESGetRestart_GMRES);CHKERRQ(ierr);
716 
717   pipefgmres->nextra_vecs    = 1;
718   pipefgmres->haptol         = 1.0e-30;
719   pipefgmres->q_preallocate  = 0;
720   pipefgmres->delta_allocate = PIPEFGMRES_DELTA_DIRECTIONS;
721   pipefgmres->orthog         = 0;
722   pipefgmres->nrs            = 0;
723   pipefgmres->sol_temp       = 0;
724   pipefgmres->max_k          = PIPEFGMRES_DEFAULT_MAXK;
725   pipefgmres->Rsvd           = 0;
726   pipefgmres->orthogwork     = 0;
727   pipefgmres->cgstype        = KSP_GMRES_CGS_REFINE_NEVER;
728   pipefgmres->shift          = 1.0;
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "KSPPIPEFGMRESGetNewVectors"
734 static PetscErrorCode KSPPIPEFGMRESGetNewVectors(KSP ksp,PetscInt it)
735 {
736   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
737   PetscInt       nwork   = pipefgmres->nwork_alloc; /* number of work vector chunks allocated */
738   PetscInt       nalloc;                            /* number to allocate */
739   PetscErrorCode ierr;
740   PetscInt       k;
741 
742   PetscFunctionBegin;
743   nalloc = pipefgmres->delta_allocate; /* number of vectors to allocate
744                                       in a single chunk */
745 
746   /* Adjust the number to allocate to make sure that we don't exceed the
747      number of available slots (pipefgmres->vecs_allocated)*/
748   if (it + VEC_OFFSET + nalloc >= pipefgmres->vecs_allocated) {
749     nalloc = pipefgmres->vecs_allocated - it - VEC_OFFSET;
750   }
751   if (!nalloc) PetscFunctionReturn(0);
752 
753   pipefgmres->vv_allocated += nalloc; /* vv_allocated is the number of vectors allocated */
754 
755   /* work vectors */
756   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->user_work[nwork],0,NULL);CHKERRQ(ierr);
757   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->user_work[nwork]);CHKERRQ(ierr);
758   for (k=0; k < nalloc; k++) {
759     pipefgmres->vecs[it+VEC_OFFSET+k] = pipefgmres->user_work[nwork][k];
760   }
761   /* specify size of chunk allocated */
762   pipefgmres->mwork_alloc[nwork] = nalloc;
763 
764   /* preconditioned vectors (note we don't use VEC_OFFSET) */
765   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->prevecs_user_work[nwork],0,NULL);CHKERRQ(ierr);
766   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->prevecs_user_work[nwork]);CHKERRQ(ierr);
767   for (k=0; k < nalloc; k++) {
768     pipefgmres->prevecs[it+k] = pipefgmres->prevecs_user_work[nwork][k];
769   }
770 
771   ierr = KSPCreateVecs(ksp,nalloc,&pipefgmres->zvecs_user_work[nwork],0,NULL);CHKERRQ(ierr);
772   ierr = PetscLogObjectParents(ksp,nalloc,pipefgmres->zvecs_user_work[nwork]);CHKERRQ(ierr);
773   for (k=0; k < nalloc; k++) {
774     pipefgmres->zvecs[it+k] = pipefgmres->zvecs_user_work[nwork][k];
775   }
776 
777   /* increment the number of work vector chunks */
778   pipefgmres->nwork_alloc++;
779   PetscFunctionReturn(0);
780 }
781 #undef __FUNCT__
782 #define __FUNCT__ "KSPPIPEFGMRESSetShift"
783 /*@
784   KSPPIPEFGMRESSetShift - Set the shift parameter for the flexible, pipelined GMRES solver.
785 
786   A heuristic is to set this to be comparable to the largest eigenvalue of the preconditioned operator. This can be acheived with PETSc itself by using a few iterations of a Krylov method. See KSPComputeEigenvalues (and note the caveats there).
787 
788 Logically Collective on KSP
789 
790 Input Parameters:
791 +  ksp - the Krylov space context
792 -  shift - the shift
793 
794 Level: intermediate
795 
796 Options Database:
797 . -ksp_pipefgmres_shift <shift>
798 
799 .seealso: KSPComputeEigenvalues()
800 @*/
801 PetscErrorCode KSPPIPEFGMRESSetShift(KSP ksp,PetscScalar shift)
802 {
803   KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)ksp->data;
804 
805   PetscFunctionBegin;
806   PetscValidHeaderSpecific(ksp,KSP_CLASSID,1);
807   PetscValidLogicalCollectiveScalar(ksp,shift,2);
808   pipefgmres->shift = shift;
809   PetscFunctionReturn(0);
810 }
811