xref: /petsc/src/tao/bound/impls/bncg/bncg.c (revision ccb4e88a40f0b86eaeca07ff64c64e4de2fae686)
1 #include <petsctaolinesearch.h>
2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/
3 #include <petscksp.h>
4 
5 #define CG_GradientDescent      0
6 #define CG_HestenesStiefel      1
7 #define CG_FletcherReeves       2
8 #define CG_PolakRibierePolyak   3
9 #define CG_PolakRibierePlus     4
10 #define CG_DaiYuan              5
11 #define CG_HagerZhang           6
12 #define CG_DaiKou               7
13 #define CG_KouDai               8
14 #define CG_SSML_BFGS            9
15 #define CG_SSML_DFP             10
16 #define CG_SSML_BROYDEN         11
17 #define CG_PCGradientDescent    12
18 #define CGTypes                 13
19 
20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"};
21 
22 #define CG_AS_NONE       0
23 #define CG_AS_BERTSEKAS  1
24 #define CG_AS_SIZE       2
25 
26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"};
27 
28 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType)
29 {
30   PetscErrorCode               ierr;
31   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;
32 
33   PetscFunctionBegin;
34   ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr);
35   if (cg->inactive_idx) {
36     ierr = ISDuplicate(cg->inactive_idx, &cg->inactive_old);CHKERRQ(ierr);
37     ierr = ISCopy(cg->inactive_idx, cg->inactive_old);CHKERRQ(ierr);
38   }
39   switch (asType) {
40   case CG_AS_NONE:
41     ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr);
42     ierr = VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx);CHKERRQ(ierr);
43     ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr);
44     ierr = ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx);CHKERRQ(ierr);
45     break;
46 
47   case CG_AS_BERTSEKAS:
48     /* Use gradient descent to estimate the active set */
49     ierr = VecCopy(cg->unprojected_gradient, cg->W);CHKERRQ(ierr);
50     ierr = VecScale(cg->W, -1.0);CHKERRQ(ierr);
51     ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol,
52                                    &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);CHKERRQ(ierr);
53     break;
54 
55   default:
56     break;
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step)
62 {
63   PetscErrorCode               ierr;
64   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;
65 
66   PetscFunctionBegin;
67   switch (asType) {
68   case CG_AS_NONE:
69     ierr = VecISSet(step, cg->active_idx, 0.0);CHKERRQ(ierr);
70     break;
71 
72   case CG_AS_BERTSEKAS:
73     ierr = TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step);CHKERRQ(ierr);
74     break;
75 
76   default:
77     break;
78   }
79   PetscFunctionReturn(0);
80 }
81 
82 static PetscErrorCode TaoSolve_BNCG(Tao tao)
83 {
84   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
85   PetscErrorCode               ierr;
86   PetscReal                    step=1.0,gnorm,gnorm2, resnorm;
87   PetscInt                     nDiff;
88 
89   PetscFunctionBegin;
90   /*   Project the current point onto the feasible set */
91   ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr);
92   ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr);
93 
94   /* Project the initial point onto the feasible region */
95   ierr = TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution);CHKERRQ(ierr);
96 
97   if (nDiff > 0 || !tao->recycle) {
98     ierr = TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient);CHKERRQ(ierr);
99   }
100   ierr = VecNorm(cg->unprojected_gradient,NORM_2,&gnorm);CHKERRQ(ierr);
101   if (PetscIsInfOrNanReal(cg->f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
102 
103   /* Estimate the active set and compute the projected gradient */
104   ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr);
105 
106   /* Project the gradient and calculate the norm */
107   ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr);
108   ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr);
109   ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr);
110   gnorm2 = gnorm*gnorm;
111 
112   /* Initialize counters */
113   tao->niter = 0;
114   cg->ls_fails = cg->descent_error = 0;
115   cg->resets = -1;
116   cg->skipped_updates = cg->pure_gd_steps = 0;
117   cg->iter_quad = 0;
118 
119   /* Convergence test at the starting point. */
120   tao->reason = TAO_CONTINUE_ITERATING;
121 
122   ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr);
123   ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr);
124   if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
125   ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr);
126   ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr);
127   ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
128   if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
129   /* Calculate initial direction. */
130   if (!tao->recycle) {
131     /* We are not recycling a solution/history from a past TaoSolve */
132     ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
133   }
134   /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */
135   while (1) {
136     /* Call general purpose update function */
137     if (tao->ops->update) {
138       ierr = (*tao->ops->update)(tao, tao->niter, tao->user_update);CHKERRQ(ierr);
139     }
140     ierr = TaoBNCGConductIteration(tao, gnorm);CHKERRQ(ierr);
141     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
142   }
143 }
144 
145 static PetscErrorCode TaoSetUp_BNCG(Tao tao)
146 {
147   TAO_BNCG         *cg = (TAO_BNCG*)tao->data;
148   PetscErrorCode ierr;
149 
150   PetscFunctionBegin;
151   if (!tao->gradient) {
152     ierr = VecDuplicate(tao->solution,&tao->gradient);CHKERRQ(ierr);
153   }
154   if (!tao->stepdirection) {
155     ierr = VecDuplicate(tao->solution,&tao->stepdirection);CHKERRQ(ierr);
156   }
157   if (!cg->W) {
158     ierr = VecDuplicate(tao->solution,&cg->W);CHKERRQ(ierr);
159   }
160   if (!cg->work) {
161     ierr = VecDuplicate(tao->solution,&cg->work);CHKERRQ(ierr);
162   }
163   if (!cg->sk) {
164     ierr = VecDuplicate(tao->solution,&cg->sk);CHKERRQ(ierr);
165   }
166   if (!cg->yk) {
167     ierr = VecDuplicate(tao->gradient,&cg->yk);CHKERRQ(ierr);
168   }
169   if (!cg->X_old) {
170     ierr = VecDuplicate(tao->solution,&cg->X_old);CHKERRQ(ierr);
171   }
172   if (!cg->G_old) {
173     ierr = VecDuplicate(tao->gradient,&cg->G_old);CHKERRQ(ierr);
174   }
175   if (cg->diag_scaling) {
176     ierr = VecDuplicate(tao->solution,&cg->d_work);CHKERRQ(ierr);
177     ierr = VecDuplicate(tao->solution,&cg->y_work);CHKERRQ(ierr);
178     ierr = VecDuplicate(tao->solution,&cg->g_work);CHKERRQ(ierr);
179   }
180   if (!cg->unprojected_gradient) {
181     ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient);CHKERRQ(ierr);
182   }
183   if (!cg->unprojected_gradient_old) {
184     ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient_old);CHKERRQ(ierr);
185   }
186   ierr = MatLMVMAllocate(cg->B, cg->sk, cg->yk);CHKERRQ(ierr);
187   if (cg->pc) {
188     ierr = MatLMVMSetJ0(cg->B, cg->pc);CHKERRQ(ierr);
189   }
190   PetscFunctionReturn(0);
191 }
192 
193 static PetscErrorCode TaoDestroy_BNCG(Tao tao)
194 {
195   TAO_BNCG       *cg = (TAO_BNCG*) tao->data;
196   PetscErrorCode ierr;
197 
198   PetscFunctionBegin;
199   if (tao->setupcalled) {
200     ierr = VecDestroy(&cg->W);CHKERRQ(ierr);
201     ierr = VecDestroy(&cg->work);CHKERRQ(ierr);
202     ierr = VecDestroy(&cg->X_old);CHKERRQ(ierr);
203     ierr = VecDestroy(&cg->G_old);CHKERRQ(ierr);
204     ierr = VecDestroy(&cg->unprojected_gradient);CHKERRQ(ierr);
205     ierr = VecDestroy(&cg->unprojected_gradient_old);CHKERRQ(ierr);
206     ierr = VecDestroy(&cg->g_work);CHKERRQ(ierr);
207     ierr = VecDestroy(&cg->d_work);CHKERRQ(ierr);
208     ierr = VecDestroy(&cg->y_work);CHKERRQ(ierr);
209     ierr = VecDestroy(&cg->sk);CHKERRQ(ierr);
210     ierr = VecDestroy(&cg->yk);CHKERRQ(ierr);
211   }
212   ierr = ISDestroy(&cg->active_lower);CHKERRQ(ierr);
213   ierr = ISDestroy(&cg->active_upper);CHKERRQ(ierr);
214   ierr = ISDestroy(&cg->active_fixed);CHKERRQ(ierr);
215   ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr);
216   ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr);
217   ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr);
218   ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr);
219   ierr = MatDestroy(&cg->B);CHKERRQ(ierr);
220   if (cg->pc) {
221     ierr = MatDestroy(&cg->pc);CHKERRQ(ierr);
222   }
223   ierr = PetscFree(tao->data);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao)
228 {
229     TAO_BNCG       *cg = (TAO_BNCG*)tao->data;
230     PetscErrorCode ierr;
231 
232     PetscFunctionBegin;
233     ierr = TaoLineSearchSetFromOptions(tao->linesearch);CHKERRQ(ierr);
234     ierr = PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization");CHKERRQ(ierr);
235     ierr = PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr);
236     if (cg->cg_type != CG_SSML_BFGS) {
237       cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */
238     }
239     if (CG_GradientDescent == cg->cg_type) {
240       cg->cg_type = CG_PCGradientDescent;
241       /* Set scaling equal to none or, at best, scalar scaling. */
242       cg->unscaled_restart = PETSC_TRUE;
243       cg->diag_scaling = PETSC_FALSE;
244     }
245     ierr = PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr);
246 
247     ierr = PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL);CHKERRQ(ierr);
248     ierr = PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL);CHKERRQ(ierr);
249     ierr = PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL);CHKERRQ(ierr);
250     ierr = PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL);CHKERRQ(ierr);
251     ierr = PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL);CHKERRQ(ierr);
252     ierr = PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL);CHKERRQ(ierr);
253     ierr = PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL);CHKERRQ(ierr);
254     ierr = PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL);CHKERRQ(ierr);
255     ierr = PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL);CHKERRQ(ierr);
256     ierr = PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL);CHKERRQ(ierr);
257     ierr = PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL);CHKERRQ(ierr);
258     ierr = PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL);CHKERRQ(ierr);
259     ierr = PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL);CHKERRQ(ierr);
260     ierr = PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL);CHKERRQ(ierr);
261     ierr = PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL);CHKERRQ(ierr);
262     ierr = PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL);CHKERRQ(ierr);
263     ierr = PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL);CHKERRQ(ierr);
264     if (cg->no_scaling) {
265       cg->diag_scaling = PETSC_FALSE;
266       cg->alpha = -1.0;
267     }
268     if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling) { /* Some more default options that appear to be good. */
269       cg->neg_xi = PETSC_TRUE;
270     }
271     ierr = PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL);CHKERRQ(ierr);
272     ierr = PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL);CHKERRQ(ierr);
273     ierr = PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL);CHKERRQ(ierr);
274     ierr = PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL);CHKERRQ(ierr);
275     ierr = PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL);CHKERRQ(ierr);
276 
277    ierr = PetscOptionsTail();CHKERRQ(ierr);
278    ierr = MatSetFromOptions(cg->B);CHKERRQ(ierr);
279    PetscFunctionReturn(0);
280 }
281 
282 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer)
283 {
284   PetscBool      isascii;
285   TAO_BNCG       *cg = (TAO_BNCG*)tao->data;
286   PetscErrorCode ierr;
287 
288   PetscFunctionBegin;
289   ierr = PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr);
290   if (isascii) {
291     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
292     ierr = PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type]);CHKERRQ(ierr);
293     ierr = PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %i\n", cg->skipped_updates);CHKERRQ(ierr);
294     ierr = PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %i\n", cg->resets);CHKERRQ(ierr);
295     ierr = PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %i\n", cg->pure_gd_steps);CHKERRQ(ierr);
296     ierr = PetscViewerASCIIPrintf(viewer, "Not a descent direction: %i\n", cg->descent_error);CHKERRQ(ierr);
297     ierr = PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails);CHKERRQ(ierr);
298     if (cg->diag_scaling) {
299       ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
300       if (isascii) {
301         ierr = PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
302         ierr = MatView(cg->B, viewer);CHKERRQ(ierr);
303         ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr);
304       }
305     }
306     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha)
312 {
313   PetscReal            a, b, c, sig1, sig2;
314 
315   PetscFunctionBegin;
316   *scale = 0.0;
317 
318   if (1.0 == alpha) {
319     *scale = yts/yty;
320   } else if (0.0 == alpha) {
321     *scale = sts/yts;
322   }
323   else if (-1.0 == alpha) *scale = 1.0;
324   else {
325     a = yty;
326     b = yts;
327     c = sts;
328     a *= alpha;
329     b *= -(2.0*alpha - 1.0);
330     c *= alpha - 1.0;
331     sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
332     sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
333     /* accept the positive root as the scalar */
334     if (sig1 > 0.0) {
335       *scale = sig1;
336     } else if (sig2 > 0.0) {
337       *scale = sig2;
338     } else {
339       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar");
340     }
341   }
342   PetscFunctionReturn(0);
343 }
344 
345 /*MC
346   TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method.
347 
348   Options Database Keys:
349 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled)
350 . -tao_bncg_eta <r> - restart tolerance
351 . -tao_bncg_type <taocg_type> - cg formula
352 . -tao_bncg_as_type <none,bertsekas> - active set estimation method
353 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation
354 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation
355 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision.
356 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method
357 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
358 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
359 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method
360 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method
361 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method
362 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method
363 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True.
364 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods
365 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts
366 . -tao_bncg_zeta <r> - Scaling parameter in the KD method
367 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps
368 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps
369 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart
370 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem
371 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations
372 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults.
373 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions
374 
375   Notes:
376     CG formulas are:
377 + "gd" - Gradient Descent
378 . "fr" - Fletcher-Reeves
379 . "pr" - Polak-Ribiere-Polyak
380 . "prp" - Polak-Ribiere-Plus
381 . "hs" - Hestenes-Steifel
382 . "dy" - Dai-Yuan
383 . "ssml_bfgs" - Self-Scaling Memoryless BFGS
384 . "ssml_dfp"  - Self-Scaling Memoryless DFP
385 . "ssml_brdn" - Self-Scaling Memoryless Broyden
386 . "hz" - Hager-Zhang (CG_DESCENT 5.3)
387 . "dk" - Dai-Kou (2013)
388 - "kd" - Kou-Dai (2015)
389 
390   Level: beginner
391 M*/
392 
393 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao)
394 {
395   TAO_BNCG       *cg;
396   const char     *morethuente_type = TAOLINESEARCHMT;
397   PetscErrorCode ierr;
398 
399   PetscFunctionBegin;
400   tao->ops->setup = TaoSetUp_BNCG;
401   tao->ops->solve = TaoSolve_BNCG;
402   tao->ops->view = TaoView_BNCG;
403   tao->ops->setfromoptions = TaoSetFromOptions_BNCG;
404   tao->ops->destroy = TaoDestroy_BNCG;
405 
406   /* Override default settings (unless already changed) */
407   if (!tao->max_it_changed) tao->max_it = 2000;
408   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
409 
410   /*  Note: nondefault values should be used for nonlinear conjugate gradient  */
411   /*  method.  In particular, gtol should be less that 0.5; the value used in  */
412   /*  Nocedal and Wright is 0.10.  We use the default values for the  */
413   /*  linesearch because it seems to work better. */
414   ierr = TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch);CHKERRQ(ierr);
415   ierr = PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1);CHKERRQ(ierr);
416   ierr = TaoLineSearchSetType(tao->linesearch, morethuente_type);CHKERRQ(ierr);
417   ierr = TaoLineSearchUseTaoRoutines(tao->linesearch, tao);CHKERRQ(ierr);
418   ierr = TaoLineSearchSetOptionsPrefix(tao->linesearch,tao->hdr.prefix);CHKERRQ(ierr);
419 
420   ierr = PetscNewLog(tao,&cg);CHKERRQ(ierr);
421   tao->data = (void*)cg;
422   ierr = KSPInitializePackage();CHKERRQ(ierr);
423   ierr = MatCreate(PetscObjectComm((PetscObject)tao), &cg->B);CHKERRQ(ierr);
424   ierr = PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1);CHKERRQ(ierr);
425   ierr = MatSetOptionsPrefix(cg->B, "tao_bncg_");CHKERRQ(ierr);
426   ierr = MatSetType(cg->B, MATLMVMDIAGBROYDEN);CHKERRQ(ierr);
427 
428   cg->pc = NULL;
429 
430   cg->dk_eta = 0.5;
431   cg->hz_eta = 0.4;
432   cg->dynamic_restart = PETSC_FALSE;
433   cg->unscaled_restart = PETSC_FALSE;
434   cg->no_scaling = PETSC_FALSE;
435   cg->delta_min = 1e-7;
436   cg->delta_max = 100;
437   cg->theta = 1.0;
438   cg->hz_theta = 1.0;
439   cg->dfp_scale = 1.0;
440   cg->bfgs_scale = 1.0;
441   cg->zeta = 0.1;
442   cg->min_quad = 6;
443   cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/
444   cg->xi = 1.0;
445   cg->neg_xi = PETSC_TRUE;
446   cg->spaced_restart = PETSC_FALSE;
447   cg->tol_quad = 1e-8;
448   cg->as_step = 0.001;
449   cg->as_tol = 0.001;
450   cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/
451   cg->as_type = CG_AS_BERTSEKAS;
452   cg->cg_type = CG_SSML_BFGS;
453   cg->alpha = 1.0;
454   cg->diag_scaling = PETSC_TRUE;
455   PetscFunctionReturn(0);
456 }
457 
458 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq)
459 {
460    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
461    PetscErrorCode    ierr;
462    PetscReal         scaling;
463 
464    PetscFunctionBegin;
465    ++cg->resets;
466    scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23);
467    scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling));
468    if (cg->unscaled_restart) {
469      scaling = 1.0;
470      ++cg->pure_gd_steps;
471    }
472    ierr = VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient);CHKERRQ(ierr);
473    /* Also want to reset our diagonal scaling with each restart */
474    if (cg->diag_scaling) {
475      ierr = MatLMVMReset(cg->B, PETSC_FALSE);CHKERRQ(ierr);
476    }
477    PetscFunctionReturn(0);
478  }
479 
480 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold)
481 {
482    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
483    PetscReal         quadinterp;
484 
485    PetscFunctionBegin;
486    if (cg->f < cg->min_quad/10) {
487      *dynrestart = PETSC_FALSE;
488      PetscFunctionReturn(0);
489    } /* just skip this since this strategy doesn't work well for functions near zero */
490    quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old));
491    if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad;
492    else {
493      cg->iter_quad = 0;
494      *dynrestart = PETSC_FALSE;
495    }
496    if (cg->iter_quad >= cg->min_quad) {
497      cg->iter_quad = 0;
498      *dynrestart = PETSC_TRUE;
499    }
500    PetscFunctionReturn(0);
501  }
502 
503 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback)
504 {
505   TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
506   PetscErrorCode    ierr;
507   PetscReal         gamma = 1.0, tau_k, beta;
508   PetscReal         tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd;
509   PetscReal         gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg;
510   PetscInt          dim;
511   PetscBool         cg_restart = PETSC_FALSE;
512   PetscFunctionBegin;
513 
514   /* Local curvature check to see if we need to restart */
515   if (tao->niter >= 1 || tao->recycle) {
516     ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
517     ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
518     ynorm2 = ynorm*ynorm;
519     ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
520     if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON) {
521       cg_restart = PETSC_TRUE;
522       ++cg->skipped_updates;
523     }
524     if (cg->spaced_restart) {
525       ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr);
526       if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE;
527     }
528   }
529   /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */
530   if (cg->spaced_restart) {
531     ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr);
532     if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE;
533   }
534   /* Compute the diagonal scaling vector if applicable */
535   if (cg->diag_scaling) {
536     ierr = MatLMVMUpdate(cg->B, tao->solution, tao->gradient);CHKERRQ(ierr);
537   }
538 
539   /* A note on diagonal scaling (to be added to paper):
540    For the FR, PR, PRP, and DY methods, the diagonally scaled versions
541    must be derived as a preconditioned CG method rather than as
542    a Hessian initialization like in the Broyden methods. */
543 
544   /* In that case, one writes the objective function as
545    f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k).
546    Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to
547    HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the
548    same under preconditioning. Note that A is diagonal, such that A^T = A. */
549 
550   /* This yields questions like what the dot product d_k^T y_k
551    should look like. HZ mistakenly treats that as the same under
552    preconditioning, but that is not necessarily true. */
553 
554   /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation,
555    we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}),
556    yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is
557    NOT the same if our preconditioning matrix is updated between iterations.
558    This same issue is found when considering dot products of the form g_{k+1}^T y_k. */
559 
560   /* Compute CG step direction */
561   if (cg_restart) {
562     ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
563   } else if (pcgd_fallback) {
564     /* Just like preconditioned CG */
565     ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
566     ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr);
567   } else if (ynorm2 > PETSC_MACHINE_EPSILON) {
568     switch (cg->cg_type) {
569     case CG_PCGradientDescent:
570       if (!cg->diag_scaling) {
571         if (!cg->no_scaling) {
572         cg->sts = step*step*dnorm*dnorm;
573         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
574         } else {
575           tau_k = 1.0;
576           ++cg->pure_gd_steps;
577         }
578         ierr = VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient);CHKERRQ(ierr);
579       } else {
580         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
581         ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr);
582       }
583       break;
584 
585     case CG_HestenesStiefel:
586       /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */
587       if (!cg->diag_scaling) {
588         cg->sts = step*step*dnorm*dnorm;
589         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
590         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
591         beta = tau_k*gkp1_yk/dk_yk;
592         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
593       } else {
594         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
595         ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
596         beta = gkp1_yk/dk_yk;
597         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
598       }
599       break;
600 
601     case CG_FletcherReeves:
602       ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
603       ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
604       ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
605       ynorm2 = ynorm*ynorm;
606       ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
607       if (!cg->diag_scaling) {
608         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha);CHKERRQ(ierr);
609         beta = tau_k*gnorm2/gnorm2_old;
610         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
611       } else {
612         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Before it's updated */
613         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
614         ierr = VecDot(tao->gradient, cg->g_work, &tmp);CHKERRQ(ierr);
615         beta = tmp/gnorm2_old;
616         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
617       }
618       break;
619 
620     case CG_PolakRibierePolyak:
621       snorm = step*dnorm;
622       if (!cg->diag_scaling) {
623         ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
624         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
625         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
626         beta = tau_k*gkp1_yk/gnorm2_old;
627         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
628       } else {
629         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr);
630         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
631         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
632         beta = gkp1_yk/gnorm2_old;
633         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
634       }
635       break;
636 
637     case CG_PolakRibierePlus:
638       ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
639       ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
640       ynorm2 = ynorm*ynorm;
641       if (!cg->diag_scaling) {
642         ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
643         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
644         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
645         beta = tau_k*gkp1_yk/gnorm2_old;
646         beta = PetscMax(beta, 0.0);
647         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
648       } else {
649         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Old gtDg */
650         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
651         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
652         beta = gkp1_yk/gnorm2_old;
653         beta = PetscMax(beta, 0.0);
654         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
655       }
656       break;
657 
658     case CG_DaiYuan:
659       /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property."
660          SIAM Journal on optimization 10, no. 1 (1999): 177-182. */
661       if (!cg->diag_scaling) {
662         ierr = VecDot(tao->stepdirection, tao->gradient, &gd);CHKERRQ(ierr);
663         ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
664         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha);CHKERRQ(ierr);
665         beta = tau_k*gnorm2/(gd - gd_old);
666         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
667       } else {
668         ierr = MatMult(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
669         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
670         ierr = VecDot(cg->g_work, tao->gradient, &gtDg);CHKERRQ(ierr);
671         ierr = VecDot(tao->stepdirection, cg->G_old, &gd_old);CHKERRQ(ierr);
672         ierr = VecDot(cg->d_work, cg->g_work, &dk_yk);CHKERRQ(ierr);
673         dk_yk = dk_yk - gd_old;
674         beta = gtDg/dk_yk;
675         ierr = VecScale(cg->d_work, beta);CHKERRQ(ierr);
676         ierr = VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work);CHKERRQ(ierr);
677       }
678       break;
679 
680     case CG_HagerZhang:
681       /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent."
682          ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */
683       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
684       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
685       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
686       snorm = dnorm*step;
687       cg->yts = step*dk_yk;
688       if (cg->use_dynamic_restart) {
689         ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr);
690       }
691       if (cg->dynamic_restart) {
692         ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
693       } else {
694         if (!cg->diag_scaling) {
695           ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
696           ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
697           /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */
698           tmp = gd/dk_yk;
699           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk));
700           /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */
701           beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
702           /* d <- -t*g + beta*t*d */
703           ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
704         } else {
705           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
706           cg->yty = ynorm2;
707           cg->sts = snorm*snorm;
708           /* Apply the diagonal scaling to all my vectors */
709           ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
710           ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
711           ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
712           /* Construct the constant ytDgkp1 */
713           ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
714           /* Construct the constant for scaling Dkyk in the update */
715           tmp = gd/dk_yk;
716           ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
717           tau_k = -tau_k*gd/(dk_yk*dk_yk);
718           /* beta is the constant which adds the dk contribution */
719           beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */
720           /* From HZ2013, modified to account for diagonal scaling*/
721           ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr);
722           ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr);
723           beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
724           /* Do the update */
725           ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
726         }
727       }
728       break;
729 
730     case CG_DaiKou:
731       /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search."
732          SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */
733       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
734       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
735       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
736       snorm = step*dnorm;
737       cg->yts = dk_yk*step;
738       if (!cg->diag_scaling) {
739         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
740         ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
741         /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */
742         tmp = gd/dk_yk;
743         beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk;
744         beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
745         /* d <- -t*g + beta*t*d */
746         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
747       } else {
748         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
749         cg->yty = ynorm2;
750         cg->sts = snorm*snorm;
751         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
752         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
753         ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
754         /* Construct the constant ytDgkp1 */
755         ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
756         ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
757         tau_k = tau_k*gd/(dk_yk*dk_yk);
758         tmp = gd/dk_yk;
759         /* beta is the constant which adds the dk contribution */
760         beta = gkp1_yk/dk_yk - step*tmp - tau_k;
761         /* Update this for the last term in beta */
762         ierr = VecDot(cg->y_work, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
763         beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */
764         ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr);
765         ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr);
766         beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
767         /* Do the update */
768         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
769       }
770       break;
771 
772     case CG_KouDai:
773       /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization."
774          Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */
775       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
776       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
777       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
778       snorm = step*dnorm;
779       cg->yts = dk_yk*step;
780       if (cg->use_dynamic_restart) {
781         ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr);
782       }
783       if (cg->dynamic_restart) {
784         ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
785       } else {
786         if (!cg->diag_scaling) {
787           ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
788           ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
789           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk;
790           if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */
791           {
792             beta = cg->zeta*tau_k*gd/(dnorm*dnorm);
793             gamma = 0.0;
794           } else {
795             if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk;
796             /* This seems to be very effective when there's no tau_k scaling.
797                This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */
798             else {
799               gamma = cg->xi*gd/dk_yk;
800             }
801           }
802           /* d <- -t*g + beta*t*d + t*tmp*yk */
803           ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
804         } else {
805           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
806           cg->yty = ynorm2;
807           cg->sts = snorm*snorm;
808           ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
809           ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
810           /* Construct the constant ytDgkp1 */
811           ierr = VecDot(cg->yk, cg->g_work, &gkp1D_yk);CHKERRQ(ierr);
812           /* Construct the constant for scaling Dkyk in the update */
813           gamma = gd/dk_yk;
814           /* tau_k = -ytDy/(ytd)^2 * gd */
815           ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
816           tau_k = tau_k*gd/(dk_yk*dk_yk);
817           /* beta is the constant which adds the d_k contribution */
818           beta = gkp1D_yk/dk_yk - step*gamma - tau_k;
819           /* Here is the requisite check */
820           ierr = VecDot(tao->stepdirection, cg->g_work, &tmp);CHKERRQ(ierr);
821           if (cg->neg_xi) {
822             /* modified KD implementation */
823             if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk;
824             else {
825               gamma = cg->xi*gd/dk_yk;
826             }
827             if (beta < cg->zeta*tmp/(dnorm*dnorm)) {
828               beta = cg->zeta*tmp/(dnorm*dnorm);
829               gamma = 0.0;
830             }
831           } else { /* original KD 2015 implementation */
832             if (beta < cg->zeta*tmp/(dnorm*dnorm)) {
833               beta = cg->zeta*tmp/(dnorm*dnorm);
834               gamma = 0.0;
835             } else {
836               gamma = cg->xi*gd/dk_yk;
837             }
838           }
839           /* Do the update in two steps */
840           ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
841           ierr = VecAXPY(tao->stepdirection, gamma, cg->y_work);CHKERRQ(ierr);
842         }
843       }
844       break;
845 
846     case CG_SSML_BFGS:
847       /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory."
848          Discussion Papers 269 (1977). */
849       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
850       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
851       snorm = step*dnorm;
852       cg->yts = dk_yk*step;
853       cg->yty = ynorm2;
854       cg->sts = snorm*snorm;
855       if (!cg->diag_scaling) {
856         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
857         ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
858         tmp = gd/dk_yk;
859         beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp;
860         /* d <- -t*g + beta*t*d + t*tmp*yk */
861         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
862       } else {
863         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
864         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
865         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
866         /* compute scalar gamma */
867         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
868         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
869         gamma = gd/dk_yk;
870         /* Compute scalar beta */
871         beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk;
872         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
873         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
874       }
875       break;
876 
877     case CG_SSML_DFP:
878       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
879       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
880       snorm = step*dnorm;
881       cg->yts = dk_yk*step;
882       cg->yty = ynorm2;
883       cg->sts = snorm*snorm;
884       if (!cg->diag_scaling) {
885         /* Instead of a regular convex combination, we will solve a quadratic formula. */
886         ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
887         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
888         tau_k = cg->dfp_scale*tau_k;
889         tmp = tau_k*gkp1_yk/cg->yty;
890         beta = -step*gd/dk_yk;
891         /* d <- -t*g + beta*d + tmp*yk */
892         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
893       } else {
894         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */
895         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
896         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
897         /* compute scalar gamma */
898         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
899         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
900         gamma = (gkp1_yk/tmp);
901         /* Compute scalar beta */
902         beta = -step*gd/dk_yk;
903         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
904         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
905       }
906       break;
907 
908     case CG_SSML_BROYDEN:
909       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
910       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
911       snorm = step*dnorm;
912       cg->yts = step*dk_yk;
913       cg->yty = ynorm2;
914       cg->sts = snorm*snorm;
915       if (!cg->diag_scaling) {
916         /* Instead of a regular convex combination, we will solve a quadratic formula. */
917         ierr = TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale);CHKERRQ(ierr);
918         ierr = TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale);CHKERRQ(ierr);
919         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
920         tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp;
921         /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0,
922            it should reproduce the tau_dfp scaling. Same with dfp_scale.   */
923         tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/cg->yty;
924         beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*gd/dk_yk;
925         /* d <- -t*g + beta*d + tmp*yk */
926         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
927       } else {
928         /* We have diagonal scaling enabled */
929         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
930         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
931         /* compute scalar gamma */
932         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
933         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
934         gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp);
935         /* Compute scalar beta */
936         beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk;
937         /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
938         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
939       }
940       break;
941 
942     default:
943       break;
944 
945     }
946   }
947   PetscFunctionReturn(0);
948 }
949 
950 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm)
951 {
952   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
953   PetscErrorCode               ierr;
954   TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING;
955   PetscReal                    step=1.0,gnorm2,gd,dnorm=0.0;
956   PetscReal                    gnorm2_old,f_old,resnorm, gnorm_old;
957   PetscBool                    pcgd_fallback = PETSC_FALSE;
958 
959   PetscFunctionBegin;
960   /* We are now going to perform a line search along the direction. */
961   /* Store solution and gradient info before it changes */
962   ierr = VecCopy(tao->solution, cg->X_old);CHKERRQ(ierr);
963   ierr = VecCopy(tao->gradient, cg->G_old);CHKERRQ(ierr);
964   ierr = VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old);CHKERRQ(ierr);
965 
966   gnorm_old = gnorm;
967   gnorm2_old = gnorm_old*gnorm_old;
968   f_old = cg->f;
969   /* Perform bounded line search. If we are recycling a solution from a previous */
970   /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */
971   if (!(tao->recycle && 0 == tao->niter)) {
972     /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */
973     ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
974     ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
975     ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
976 
977     /*  Check linesearch failure */
978     if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
979       ++cg->ls_fails;
980       if (cg->cg_type == CG_GradientDescent) {
981         /* Nothing left to do but fail out of the optimization */
982         step = 0.0;
983         tao->reason = TAO_DIVERGED_LS_FAILURE;
984       } else {
985         /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */
986         ierr = VecCopy(cg->X_old, tao->solution);CHKERRQ(ierr);
987         ierr = VecCopy(cg->G_old, tao->gradient);CHKERRQ(ierr);
988         ierr = VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient);CHKERRQ(ierr);
989         gnorm = gnorm_old;
990         gnorm2 = gnorm2_old;
991         cg->f = f_old;
992 
993         /* Fall back on preconditioned CG (so long as you're not already using it) */
994         if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling) {
995           pcgd_fallback = PETSC_TRUE;
996           ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback);CHKERRQ(ierr);
997 
998           ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
999           ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1000 
1001           ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
1002           ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
1003           ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
1004 
1005           pcgd_fallback = PETSC_FALSE;
1006           if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
1007             /* Going to perform a regular gradient descent step. */
1008             ++cg->ls_fails;
1009             step = 0.0;
1010           }
1011         }
1012         /* Fall back on the scaled gradient step */
1013         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
1014           ++cg->ls_fails;
1015           ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1016           ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1017           ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
1018           ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
1019           ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
1020         }
1021 
1022         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
1023           /* Nothing left to do but fail out of the optimization */
1024           ++cg->ls_fails;
1025           step = 0.0;
1026           tao->reason = TAO_DIVERGED_LS_FAILURE;
1027         } else {
1028           /* One of the fallbacks worked. Set them both back equal to false. */
1029           pcgd_fallback = PETSC_FALSE;
1030         }
1031       }
1032     }
1033     /* Convergence test for line search failure */
1034     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
1035 
1036     /* Standard convergence test */
1037     ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr);
1038     ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr);
1039     if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
1040     ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr);
1041     ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr);
1042     ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
1043     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
1044   }
1045   /* Assert we have an updated step and we need at least one more iteration. */
1046   /* Calculate the next direction */
1047   /* Estimate the active set at the new solution */
1048   ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr);
1049   /* Compute the projected gradient and its norm */
1050   ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr);
1051   ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr);
1052   ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr);
1053   gnorm2 = gnorm*gnorm;
1054 
1055   /* Calculate some quantities used in the StepDirectionUpdate. */
1056   ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr);
1057   /* Update the step direction. */
1058   ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback);CHKERRQ(ierr);
1059   ++tao->niter;
1060   ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1061 
1062   if (cg->cg_type != CG_GradientDescent) {
1063     /* Figure out which previously active variables became inactive this iteration */
1064     ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr);
1065     if (cg->inactive_idx && cg->inactive_old) {
1066       ierr = ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives);CHKERRQ(ierr);
1067     }
1068     /* Selectively reset the CG step those freshly inactive variables */
1069     if (cg->new_inactives) {
1070       ierr = VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr);
1071       ierr = VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr);
1072       ierr = VecCopy(cg->inactive_grad, cg->inactive_step);CHKERRQ(ierr);
1073       ierr = VecScale(cg->inactive_step, -1.0);CHKERRQ(ierr);
1074       ierr = VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr);
1075       ierr = VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr);
1076     }
1077     /* Verify that this is a descent direction */
1078     ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
1079     ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr);
1080     if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) {
1081       /* Not a descent direction, so we reset back to projected gradient descent */
1082       ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1083       ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1084       ++cg->descent_error;
1085     } else {
1086     }
1087   }
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0)
1092 {
1093   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
1094   PetscErrorCode               ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = PetscObjectReference((PetscObject)H0);CHKERRQ(ierr);
1098   cg->pc = H0;
1099   PetscFunctionReturn(0);
1100 }
1101