xref: /petsc/src/tao/bound/impls/bncg/bncg.c (revision cb497662fae1ebe98e095979598fa3ebb2f149fa)
1 #include <petsctaolinesearch.h>
2 #include <../src/tao/bound/impls/bncg/bncg.h>
3 #include <petscksp.h>
4 
5 #define CG_GradientDescent      0
6 #define CG_HestenesStiefel      1
7 #define CG_FletcherReeves       2
8 #define CG_PolakRibierePolyak   3
9 #define CG_PolakRibierePlus     4
10 #define CG_DaiYuan              5
11 #define CG_HagerZhang           6
12 #define CG_DaiKou               7
13 #define CG_KouDai               8
14 #define CG_SSML_BFGS            9
15 #define CG_SSML_DFP             10
16 #define CG_SSML_BROYDEN         11
17 #define CG_PCGradientDescent    12
18 #define CGTypes                 13
19 
20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"};
21 
22 #define CG_AS_NONE       0
23 #define CG_AS_BERTSEKAS  1
24 #define CG_AS_SIZE       2
25 
26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"};
27 
28 PetscErrorCode TaoBNCGSetRecycleFlag(Tao tao, PetscBool recycle)
29 {
30   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
31 
32   PetscFunctionBegin;
33   cg->recycle = recycle;
34   PetscFunctionReturn(0);
35 }
36 
37 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType)
38 {
39   PetscErrorCode               ierr;
40   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;
41 
42   PetscFunctionBegin;
43   ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr);
44   if (cg->inactive_idx) {
45     ierr = ISDuplicate(cg->inactive_idx, &cg->inactive_old);CHKERRQ(ierr);
46     ierr = ISCopy(cg->inactive_idx, cg->inactive_old);CHKERRQ(ierr);
47   }
48   switch (asType) {
49   case CG_AS_NONE:
50     ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr);
51     ierr = VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx);CHKERRQ(ierr);
52     ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr);
53     ierr = ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx);CHKERRQ(ierr);
54     break;
55 
56   case CG_AS_BERTSEKAS:
57     /* Use gradient descent to estimate the active set */
58     ierr = VecCopy(cg->unprojected_gradient, cg->W);CHKERRQ(ierr);
59     ierr = VecScale(cg->W, -1.0);CHKERRQ(ierr);
60     ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol,
61                                    &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);CHKERRQ(ierr);
62     break;
63 
64   default:
65     break;
66   }
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step)
71 {
72   PetscErrorCode               ierr;
73   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;
74 
75   PetscFunctionBegin;
76   switch (asType) {
77   case CG_AS_NONE:
78     ierr = VecISSet(step, cg->active_idx, 0.0);CHKERRQ(ierr);
79     break;
80 
81   case CG_AS_BERTSEKAS:
82     ierr = TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step);CHKERRQ(ierr);
83     break;
84 
85   default:
86     break;
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 static PetscErrorCode TaoSolve_BNCG(Tao tao)
92 {
93   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
94   PetscErrorCode               ierr;
95   PetscReal                    step=1.0,gnorm,gnorm2, resnorm;
96   PetscInt                     nDiff;
97 
98   PetscFunctionBegin;
99   /*   Project the current point onto the feasible set */
100   ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr);
101   ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr);
102 
103   /* Project the initial point onto the feasible region */
104   ierr = TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution);CHKERRQ(ierr);
105 
106   if (nDiff > 0 || !cg->recycle){
107     ierr = TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient);CHKERRQ(ierr);
108   }
109   ierr = VecNorm(cg->unprojected_gradient,NORM_2,&gnorm);CHKERRQ(ierr);
110   if (PetscIsInfOrNanReal(cg->f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
111 
112   /* Estimate the active set and compute the projected gradient */
113   ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr);
114 
115   /* Project the gradient and calculate the norm */
116   ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr);
117   ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr);
118   ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr);
119   gnorm2 = gnorm*gnorm;
120 
121   /* Initialize counters */
122   tao->niter = 0;
123   cg->ls_fails = cg->descent_error = 0;
124   cg->resets = -1;
125   cg->skipped_updates = cg->pure_gd_steps = 0;
126   cg->iter_quad = 0;
127 
128   /* Convergence test at the starting point. */
129   tao->reason = TAO_CONTINUE_ITERATING;
130 
131   ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr);
132   ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr);
133   if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
134   ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr);
135   ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr);
136   ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
137   if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
138   /* Calculate initial direction. */
139   if (!cg->recycle) {
140     /* We are not recycling a solution/history from a past TaoSolve */
141     ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
142   }
143   /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */
144   while (1) {
145     /* Call general purpose update function */
146     if (tao->ops->update) {
147       ierr = (*tao->ops->update)(tao, tao->niter, tao->user_update);CHKERRQ(ierr);
148     }
149     ierr = TaoBNCGConductIteration(tao, gnorm);CHKERRQ(ierr);
150     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
151   }
152 }
153 
154 static PetscErrorCode TaoSetUp_BNCG(Tao tao)
155 {
156   TAO_BNCG         *cg = (TAO_BNCG*)tao->data;
157   PetscErrorCode ierr;
158 
159   PetscFunctionBegin;
160   if (!tao->gradient) {
161     ierr = VecDuplicate(tao->solution,&tao->gradient);CHKERRQ(ierr);
162   }
163   if (!tao->stepdirection) {
164     ierr = VecDuplicate(tao->solution,&tao->stepdirection);CHKERRQ(ierr);
165   }
166   if (!cg->W) {
167     ierr = VecDuplicate(tao->solution,&cg->W);CHKERRQ(ierr);
168   }
169   if (!cg->work) {
170     ierr = VecDuplicate(tao->solution,&cg->work);CHKERRQ(ierr);
171   }
172   if (!cg->sk) {
173     ierr = VecDuplicate(tao->solution,&cg->sk);CHKERRQ(ierr);
174   }
175   if (!cg->yk) {
176     ierr = VecDuplicate(tao->gradient,&cg->yk);CHKERRQ(ierr);
177   }
178   if (!cg->X_old) {
179     ierr = VecDuplicate(tao->solution,&cg->X_old);CHKERRQ(ierr);
180   }
181   if (!cg->G_old) {
182     ierr = VecDuplicate(tao->gradient,&cg->G_old);CHKERRQ(ierr);
183   }
184   if (cg->diag_scaling){
185     ierr = VecDuplicate(tao->solution,&cg->d_work);CHKERRQ(ierr);
186     ierr = VecDuplicate(tao->solution,&cg->y_work);CHKERRQ(ierr);
187     ierr = VecDuplicate(tao->solution,&cg->g_work);CHKERRQ(ierr);
188   }
189   if (!cg->unprojected_gradient) {
190     ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient);CHKERRQ(ierr);
191   }
192   if (!cg->unprojected_gradient_old) {
193     ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient_old);CHKERRQ(ierr);
194   }
195   ierr = MatLMVMAllocate(cg->B, cg->sk, cg->yk);CHKERRQ(ierr);
196   if (cg->pc){
197     ierr = MatLMVMSetJ0(cg->B, cg->pc);CHKERRQ(ierr);
198   }
199   PetscFunctionReturn(0);
200 }
201 
202 static PetscErrorCode TaoDestroy_BNCG(Tao tao)
203 {
204   TAO_BNCG       *cg = (TAO_BNCG*) tao->data;
205   PetscErrorCode ierr;
206 
207   PetscFunctionBegin;
208   if (tao->setupcalled) {
209     ierr = VecDestroy(&cg->W);CHKERRQ(ierr);
210     ierr = VecDestroy(&cg->work);CHKERRQ(ierr);
211     ierr = VecDestroy(&cg->X_old);CHKERRQ(ierr);
212     ierr = VecDestroy(&cg->G_old);CHKERRQ(ierr);
213     ierr = VecDestroy(&cg->unprojected_gradient);CHKERRQ(ierr);
214     ierr = VecDestroy(&cg->unprojected_gradient_old);CHKERRQ(ierr);
215     ierr = VecDestroy(&cg->g_work);CHKERRQ(ierr);
216     ierr = VecDestroy(&cg->d_work);CHKERRQ(ierr);
217     ierr = VecDestroy(&cg->y_work);CHKERRQ(ierr);
218     ierr = VecDestroy(&cg->sk);CHKERRQ(ierr);
219     ierr = VecDestroy(&cg->yk);CHKERRQ(ierr);
220   }
221   ierr = ISDestroy(&cg->active_lower);CHKERRQ(ierr);
222   ierr = ISDestroy(&cg->active_upper);CHKERRQ(ierr);
223   ierr = ISDestroy(&cg->active_fixed);CHKERRQ(ierr);
224   ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr);
225   ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr);
226   ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr);
227   ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr);
228   ierr = MatDestroy(&cg->B);CHKERRQ(ierr);
229   if (cg->pc) {
230     ierr = MatDestroy(&cg->pc);CHKERRQ(ierr);
231   }
232   ierr = PetscFree(tao->data);CHKERRQ(ierr);
233   PetscFunctionReturn(0);
234 }
235 
236 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao)
237 {
238     TAO_BNCG       *cg = (TAO_BNCG*)tao->data;
239     PetscErrorCode ierr;
240 
241     PetscFunctionBegin;
242     ierr = TaoLineSearchSetFromOptions(tao->linesearch);CHKERRQ(ierr);
243     ierr = PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization");CHKERRQ(ierr);
244     ierr = PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr);
245     if (cg->cg_type != CG_SSML_BFGS){
246       cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */
247     }
248     if (CG_GradientDescent == cg->cg_type){
249       cg->cg_type = CG_PCGradientDescent;
250       /* Set scaling equal to none or, at best, scalar scaling. */
251       cg->unscaled_restart = PETSC_TRUE;
252       cg->diag_scaling = PETSC_FALSE;
253     }
254     ierr = PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr);
255 
256     ierr = PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL);CHKERRQ(ierr);
257     ierr = PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL);CHKERRQ(ierr);
258     ierr = PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL);CHKERRQ(ierr);
259     ierr = PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL);CHKERRQ(ierr);
260     ierr = PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL);CHKERRQ(ierr);
261     ierr = PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL);CHKERRQ(ierr);
262     ierr = PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL);CHKERRQ(ierr);
263     ierr = PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL);CHKERRQ(ierr);
264     ierr = PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL);CHKERRQ(ierr);
265     ierr = PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL);CHKERRQ(ierr);
266     ierr = PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL);CHKERRQ(ierr);
267     ierr = PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL);CHKERRQ(ierr);
268     ierr = PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL);CHKERRQ(ierr);
269     ierr = PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL);CHKERRQ(ierr);
270     ierr = PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL);CHKERRQ(ierr);
271     ierr = PetscOptionsBool("-tao_bncg_recycle","enable recycling the existing solution, gradient, and diagonal scaling vector at the start of a new TaoSolve() call","",cg->recycle,&cg->recycle,NULL);CHKERRQ(ierr);
272     ierr = PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL);CHKERRQ(ierr);
273     ierr = PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL);CHKERRQ(ierr);
274     if (cg->no_scaling){
275       cg->diag_scaling = PETSC_FALSE;
276       cg->alpha = -1.0;
277     }
278     if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling){ /* Some more default options that appear to be good. */
279       cg->neg_xi = PETSC_TRUE;
280     }
281     ierr = PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL);CHKERRQ(ierr);
282     ierr = PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL);CHKERRQ(ierr);
283     ierr = PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL);CHKERRQ(ierr);
284     ierr = PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL);CHKERRQ(ierr);
285     ierr = PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL);CHKERRQ(ierr);
286 
287    ierr = PetscOptionsTail();CHKERRQ(ierr);
288    ierr = MatSetFromOptions(cg->B);CHKERRQ(ierr);
289    PetscFunctionReturn(0);
290 }
291 
292 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer)
293 {
294   PetscBool      isascii;
295   TAO_BNCG       *cg = (TAO_BNCG*)tao->data;
296   PetscErrorCode ierr;
297 
298   PetscFunctionBegin;
299   ierr = PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr);
300   if (isascii) {
301     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
302     ierr = PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type]);CHKERRQ(ierr);
303     ierr = PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %i\n", cg->skipped_updates);CHKERRQ(ierr);
304     ierr = PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %i\n", cg->resets);CHKERRQ(ierr);
305     ierr = PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %i\n", cg->pure_gd_steps);CHKERRQ(ierr);
306     ierr = PetscViewerASCIIPrintf(viewer, "Not a descent direction: %i\n", cg->descent_error);CHKERRQ(ierr);
307     ierr = PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails);CHKERRQ(ierr);
308     if (cg->diag_scaling){
309       ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
310       if (isascii) {
311         ierr = PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
312         ierr = MatView(cg->B, viewer);CHKERRQ(ierr);
313         ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr);
314       }
315     }
316     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha)
322 {
323   PetscReal            a, b, c, sig1, sig2;
324 
325   PetscFunctionBegin;
326   *scale = 0.0;
327 
328   if (1.0 == alpha){
329     *scale = yts/yty;
330   } else if (0.0 == alpha){
331     *scale = sts/yts;
332   }
333   else if (-1.0 == alpha) *scale = 1.0;
334   else {
335     a = yty;
336     b = yts;
337     c = sts;
338     a *= alpha;
339     b *= -(2.0*alpha - 1.0);
340     c *= alpha - 1.0;
341     sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
342     sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
343     /* accept the positive root as the scalar */
344     if (sig1 > 0.0) {
345       *scale = sig1;
346     } else if (sig2 > 0.0) {
347       *scale = sig2;
348     } else {
349       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar");
350     }
351   }
352   PetscFunctionReturn(0);
353 }
354 
355 /*MC
356   TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method.
357 
358   Options Database Keys:
359 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled)
360 . -tao_bncg_eta <r> - restart tolerance
361 . -tao_bncg_type <taocg_type> - cg formula
362 . -tao_bncg_as_type <none,bertsekas> - active set estimation method
363 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation
364 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation
365 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision.
366 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method
367 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
368 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
369 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method
370 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method
371 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method
372 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method
373 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True.
374 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods
375 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts
376 . -tao_bncg_zeta <r> - Scaling parameter in the KD method
377 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps
378 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps
379 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart
380 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem
381 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations
382 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults.
383 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions
384 
385   Notes:
386     CG formulas are:
387 + "gd" - Gradient Descent
388 . "fr" - Fletcher-Reeves
389 . "pr" - Polak-Ribiere-Polyak
390 . "prp" - Polak-Ribiere-Plus
391 . "hs" - Hestenes-Steifel
392 . "dy" - Dai-Yuan
393 . "ssml_bfgs" - Self-Scaling Memoryless BFGS
394 . "ssml_dfp"  - Self-Scaling Memoryless DFP
395 . "ssml_brdn" - Self-Scaling Memoryless Broyden
396 . "hz" - Hager-Zhang (CG_DESCENT 5.3)
397 . "dk" - Dai-Kou (2013)
398 - "kd" - Kou-Dai (2015)
399 
400   Level: beginner
401 M*/
402 
403 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao)
404 {
405   TAO_BNCG       *cg;
406   const char     *morethuente_type = TAOLINESEARCHMT;
407   PetscErrorCode ierr;
408 
409   PetscFunctionBegin;
410   tao->ops->setup = TaoSetUp_BNCG;
411   tao->ops->solve = TaoSolve_BNCG;
412   tao->ops->view = TaoView_BNCG;
413   tao->ops->setfromoptions = TaoSetFromOptions_BNCG;
414   tao->ops->destroy = TaoDestroy_BNCG;
415 
416   /* Override default settings (unless already changed) */
417   if (!tao->max_it_changed) tao->max_it = 2000;
418   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
419 
420   /*  Note: nondefault values should be used for nonlinear conjugate gradient  */
421   /*  method.  In particular, gtol should be less that 0.5; the value used in  */
422   /*  Nocedal and Wright is 0.10.  We use the default values for the  */
423   /*  linesearch because it seems to work better. */
424   ierr = TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch);CHKERRQ(ierr);
425   ierr = PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1);CHKERRQ(ierr);
426   ierr = TaoLineSearchSetType(tao->linesearch, morethuente_type);CHKERRQ(ierr);
427   ierr = TaoLineSearchUseTaoRoutines(tao->linesearch, tao);CHKERRQ(ierr);
428   ierr = TaoLineSearchSetOptionsPrefix(tao->linesearch,tao->hdr.prefix);CHKERRQ(ierr);
429 
430   ierr = PetscNewLog(tao,&cg);CHKERRQ(ierr);
431   tao->data = (void*)cg;
432   ierr = KSPInitializePackage();CHKERRQ(ierr);
433   ierr = MatCreate(PetscObjectComm((PetscObject)tao), &cg->B);CHKERRQ(ierr);
434   ierr = PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1);CHKERRQ(ierr);
435   ierr = MatSetOptionsPrefix(cg->B, "tao_bncg_");CHKERRQ(ierr);
436   ierr = MatSetType(cg->B, MATLMVMDIAGBROYDEN);CHKERRQ(ierr);
437 
438   cg->pc = NULL;
439 
440   cg->dk_eta = 0.5;
441   cg->hz_eta = 0.4;
442   cg->dynamic_restart = PETSC_FALSE;
443   cg->unscaled_restart = PETSC_FALSE;
444   cg->no_scaling = PETSC_FALSE;
445   cg->delta_min = 1e-7;
446   cg->delta_max = 100;
447   cg->theta = 1.0;
448   cg->hz_theta = 1.0;
449   cg->dfp_scale = 1.0;
450   cg->bfgs_scale = 1.0;
451   cg->zeta = 0.1;
452   cg->min_quad = 6;
453   cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/
454   cg->xi = 1.0;
455   cg->neg_xi = PETSC_TRUE;
456   cg->spaced_restart = PETSC_FALSE;
457   cg->tol_quad = 1e-8;
458   cg->as_step = 0.001;
459   cg->as_tol = 0.001;
460   cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/
461   cg->as_type = CG_AS_BERTSEKAS;
462   cg->cg_type = CG_SSML_BFGS;
463   cg->recycle = PETSC_FALSE;
464   cg->alpha = 1.0;
465   cg->diag_scaling = PETSC_TRUE;
466   PetscFunctionReturn(0);
467 }
468 
469 
470 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq)
471 {
472    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
473    PetscErrorCode    ierr;
474    PetscReal         scaling;
475 
476    PetscFunctionBegin;
477    ++cg->resets;
478    scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23);
479    scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling));
480    if (cg->unscaled_restart) {
481      scaling = 1.0;
482      ++cg->pure_gd_steps;
483    }
484    ierr = VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient);CHKERRQ(ierr);
485    /* Also want to reset our diagonal scaling with each restart */
486    if (cg->diag_scaling) {
487      ierr = MatLMVMReset(cg->B, PETSC_FALSE);CHKERRQ(ierr);
488    }
489    PetscFunctionReturn(0);
490  }
491 
492 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold)
493 {
494    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
495    PetscReal         quadinterp;
496 
497    PetscFunctionBegin;
498    if (cg->f < cg->min_quad/10) {
499      *dynrestart = PETSC_FALSE;
500      PetscFunctionReturn(0);
501    } /* just skip this since this strategy doesn't work well for functions near zero */
502    quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old));
503    if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad;
504    else {
505      cg->iter_quad = 0;
506      *dynrestart = PETSC_FALSE;
507    }
508    if (cg->iter_quad >= cg->min_quad) {
509      cg->iter_quad = 0;
510      *dynrestart = PETSC_TRUE;
511    }
512    PetscFunctionReturn(0);
513  }
514 
515 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback)
516 {
517   TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
518   PetscErrorCode    ierr;
519   PetscReal         gamma = 1.0, tau_k, beta;
520   PetscReal         tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd;
521   PetscReal         gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg;
522   PetscInt          dim;
523   PetscBool         cg_restart = PETSC_FALSE;
524   PetscFunctionBegin;
525 
526   /* Local curvature check to see if we need to restart */
527   if (tao->niter >= 1 || cg->recycle){
528     ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
529     ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
530     ynorm2 = ynorm*ynorm;
531     ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
532     if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON){
533       cg_restart = PETSC_TRUE;
534       ++cg->skipped_updates;
535     }
536     if (cg->spaced_restart) {
537       ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr);
538       if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE;
539     }
540   }
541   /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */
542   if (cg->spaced_restart){
543     ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr);
544     if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE;
545   }
546   /* Compute the diagonal scaling vector if applicable */
547   if (cg->diag_scaling) {
548     ierr = MatLMVMUpdate(cg->B, tao->solution, tao->gradient);CHKERRQ(ierr);
549   }
550 
551   /* A note on diagonal scaling (to be added to paper):
552    For the FR, PR, PRP, and DY methods, the diagonally scaled versions
553    must be derived as a preconditioned CG method rather than as
554    a Hessian initialization like in the Broyden methods. */
555 
556   /* In that case, one writes the objective function as
557    f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k).
558    Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to
559    HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the
560    same under preconditioning. Note that A is diagonal, such that A^T = A. */
561 
562   /* This yields questions like what the dot product d_k^T y_k
563    should look like. HZ mistakenly treats that as the same under
564    preconditioning, but that is not necessarily true. */
565 
566   /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation,
567    we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}),
568    yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is
569    NOT the same if our preconditioning matrix is updated between iterations.
570    This same issue is found when considering dot products of the form g_{k+1}^T y_k. */
571 
572   /* Compute CG step direction */
573   if (cg_restart) {
574     ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
575   } else if (pcgd_fallback) {
576     /* Just like preconditioned CG */
577     ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
578     ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr);
579   } else if (ynorm2 > PETSC_MACHINE_EPSILON) {
580     switch (cg->cg_type) {
581     case CG_PCGradientDescent:
582       if (!cg->diag_scaling){
583         if (!cg->no_scaling){
584         cg->sts = step*step*dnorm*dnorm;
585         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
586         } else {
587           tau_k = 1.0;
588           ++cg->pure_gd_steps;
589         }
590         ierr = VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient);CHKERRQ(ierr);
591       } else {
592         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
593         ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr);
594       }
595       break;
596 
597     case CG_HestenesStiefel:
598       /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */
599       if (!cg->diag_scaling){
600         cg->sts = step*step*dnorm*dnorm;
601         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
602         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
603         beta = tau_k*gkp1_yk/dk_yk;
604         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
605       } else {
606         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
607         ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
608         beta = gkp1_yk/dk_yk;
609         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
610       }
611       break;
612 
613     case CG_FletcherReeves:
614       ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
615       ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
616       ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
617       ynorm2 = ynorm*ynorm;
618       ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
619       if (!cg->diag_scaling){
620         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha);CHKERRQ(ierr);
621         beta = tau_k*gnorm2/gnorm2_old;
622         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
623       } else {
624         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Before it's updated */
625         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
626         ierr = VecDot(tao->gradient, cg->g_work, &tmp);CHKERRQ(ierr);
627         beta = tmp/gnorm2_old;
628         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
629       }
630       break;
631 
632     case CG_PolakRibierePolyak:
633       snorm = step*dnorm;
634       if (!cg->diag_scaling){
635         ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
636         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
637         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
638         beta = tau_k*gkp1_yk/gnorm2_old;
639         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
640       } else {
641         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr);
642         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
643         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
644         beta = gkp1_yk/gnorm2_old;
645         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
646       }
647       break;
648 
649     case CG_PolakRibierePlus:
650       ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
651       ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
652       ynorm2 = ynorm*ynorm;
653       if (!cg->diag_scaling){
654         ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
655         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
656         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
657         beta = tau_k*gkp1_yk/gnorm2_old;
658         beta = PetscMax(beta, 0.0);
659         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
660       } else {
661         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Old gtDg */
662         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
663         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
664         beta = gkp1_yk/gnorm2_old;
665         beta = PetscMax(beta, 0.0);
666         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
667       }
668       break;
669 
670     case CG_DaiYuan:
671       /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property."
672          SIAM Journal on optimization 10, no. 1 (1999): 177-182. */
673       if (!cg->diag_scaling){
674         ierr = VecDot(tao->stepdirection, tao->gradient, &gd);CHKERRQ(ierr);
675         ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
676         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha);CHKERRQ(ierr);
677         beta = tau_k*gnorm2/(gd - gd_old);
678         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
679       } else {
680         ierr = MatMult(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
681         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
682         ierr = VecDot(cg->g_work, tao->gradient, &gtDg);CHKERRQ(ierr);
683         ierr = VecDot(tao->stepdirection, cg->G_old, &gd_old);CHKERRQ(ierr);
684         ierr = VecDot(cg->d_work, cg->g_work, &dk_yk);CHKERRQ(ierr);
685         dk_yk = dk_yk - gd_old;
686         beta = gtDg/dk_yk;
687         ierr = VecScale(cg->d_work, beta);CHKERRQ(ierr);
688         ierr = VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work);CHKERRQ(ierr);
689       }
690       break;
691 
692     case CG_HagerZhang:
693       /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent."
694          ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */
695       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
696       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
697       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
698       snorm = dnorm*step;
699       cg->yts = step*dk_yk;
700       if (cg->use_dynamic_restart){
701         ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr);
702       }
703       if (cg->dynamic_restart){
704         ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
705       } else {
706         if (!cg->diag_scaling){
707           ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
708           ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
709           /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */
710           tmp = gd/dk_yk;
711           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk));
712           /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */
713           beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
714           /* d <- -t*g + beta*t*d */
715           ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
716         } else {
717           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
718           cg->yty = ynorm2;
719           cg->sts = snorm*snorm;
720           /* Apply the diagonal scaling to all my vectors */
721           ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
722           ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
723           ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
724           /* Construct the constant ytDgkp1 */
725           ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
726           /* Construct the constant for scaling Dkyk in the update */
727           tmp = gd/dk_yk;
728           ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
729           tau_k = -tau_k*gd/(dk_yk*dk_yk);
730           /* beta is the constant which adds the dk contribution */
731           beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */
732           /* From HZ2013, modified to account for diagonal scaling*/
733           ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr);
734           ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr);
735           beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
736           /* Do the update */
737           ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
738         }
739       }
740       break;
741 
742     case CG_DaiKou:
743       /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search."
744          SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */
745       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
746       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
747       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
748       snorm = step*dnorm;
749       cg->yts = dk_yk*step;
750       if (!cg->diag_scaling){
751         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
752         ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
753         /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */
754         tmp = gd/dk_yk;
755         beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk;
756         beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
757         /* d <- -t*g + beta*t*d */
758         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
759       } else {
760         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
761         cg->yty = ynorm2;
762         cg->sts = snorm*snorm;
763         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
764         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
765         ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
766         /* Construct the constant ytDgkp1 */
767         ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
768         ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
769         tau_k = tau_k*gd/(dk_yk*dk_yk);
770         tmp = gd/dk_yk;
771         /* beta is the constant which adds the dk contribution */
772         beta = gkp1_yk/dk_yk - step*tmp - tau_k;
773         /* Update this for the last term in beta */
774         ierr = VecDot(cg->y_work, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
775         beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */
776         ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr);
777         ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr);
778         beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
779         /* Do the update */
780         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
781       }
782       break;
783 
784     case CG_KouDai:
785       /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization."
786          Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */
787       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
788       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
789       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
790       snorm = step*dnorm;
791       cg->yts = dk_yk*step;
792       if (cg->use_dynamic_restart){
793         ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr);
794       }
795       if (cg->dynamic_restart){
796         ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
797       } else {
798         if (!cg->diag_scaling){
799           ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
800           ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
801           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk;
802           if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */
803           {
804             beta = cg->zeta*tau_k*gd/(dnorm*dnorm);
805             gamma = 0.0;
806           } else {
807             if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk;
808             /* This seems to be very effective when there's no tau_k scaling.
809                This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */
810             else {
811               gamma = cg->xi*gd/dk_yk;
812             }
813           }
814           /* d <- -t*g + beta*t*d + t*tmp*yk */
815           ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
816         } else {
817           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
818           cg->yty = ynorm2;
819           cg->sts = snorm*snorm;
820           ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
821           ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
822           /* Construct the constant ytDgkp1 */
823           ierr = VecDot(cg->yk, cg->g_work, &gkp1D_yk);CHKERRQ(ierr);
824           /* Construct the constant for scaling Dkyk in the update */
825           gamma = gd/dk_yk;
826           /* tau_k = -ytDy/(ytd)^2 * gd */
827           ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
828           tau_k = tau_k*gd/(dk_yk*dk_yk);
829           /* beta is the constant which adds the d_k contribution */
830           beta = gkp1D_yk/dk_yk - step*gamma - tau_k;
831           /* Here is the requisite check */
832           ierr = VecDot(tao->stepdirection, cg->g_work, &tmp);CHKERRQ(ierr);
833           if (cg->neg_xi){
834             /* modified KD implementation */
835             if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk;
836             else {
837               gamma = cg->xi*gd/dk_yk;
838             }
839             if (beta < cg->zeta*tmp/(dnorm*dnorm)){
840               beta = cg->zeta*tmp/(dnorm*dnorm);
841               gamma = 0.0;
842             }
843           } else { /* original KD 2015 implementation */
844             if (beta < cg->zeta*tmp/(dnorm*dnorm)) {
845               beta = cg->zeta*tmp/(dnorm*dnorm);
846               gamma = 0.0;
847             } else {
848               gamma = cg->xi*gd/dk_yk;
849             }
850           }
851           /* Do the update in two steps */
852           ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
853           ierr = VecAXPY(tao->stepdirection, gamma, cg->y_work);CHKERRQ(ierr);
854         }
855       }
856       break;
857 
858     case CG_SSML_BFGS:
859       /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory."
860          Discussion Papers 269 (1977). */
861       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
862       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
863       snorm = step*dnorm;
864       cg->yts = dk_yk*step;
865       cg->yty = ynorm2;
866       cg->sts = snorm*snorm;
867       if (!cg->diag_scaling){
868         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
869         ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
870         tmp = gd/dk_yk;
871         beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp;
872         /* d <- -t*g + beta*t*d + t*tmp*yk */
873         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
874       } else {
875         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
876         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
877         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
878         /* compute scalar gamma */
879         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
880         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
881         gamma = gd/dk_yk;
882         /* Compute scalar beta */
883         beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk;
884         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
885         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
886       }
887       break;
888 
889     case CG_SSML_DFP:
890       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
891       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
892       snorm = step*dnorm;
893       cg->yts = dk_yk*step;
894       cg->yty = ynorm2;
895       cg->sts = snorm*snorm;
896       if (!cg->diag_scaling){
897         /* Instead of a regular convex combination, we will solve a quadratic formula. */
898         ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
899         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
900         tau_k = cg->dfp_scale*tau_k;
901         tmp = tau_k*gkp1_yk/cg->yty;
902         beta = -step*gd/dk_yk;
903         /* d <- -t*g + beta*d + tmp*yk */
904         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
905       } else {
906         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */
907         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
908         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
909         /* compute scalar gamma */
910         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
911         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
912         gamma = (gkp1_yk/tmp);
913         /* Compute scalar beta */
914         beta = -step*gd/dk_yk;
915         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
916         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
917       }
918       break;
919 
920     case CG_SSML_BROYDEN:
921       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
922       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
923       snorm = step*dnorm;
924       cg->yts = step*dk_yk;
925       cg->yty = ynorm2;
926       cg->sts = snorm*snorm;
927       if (!cg->diag_scaling){
928         /* Instead of a regular convex combination, we will solve a quadratic formula. */
929         ierr = TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale);CHKERRQ(ierr);
930         ierr = TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale);CHKERRQ(ierr);
931         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
932         tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp;
933         /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0,
934            it should reproduce the tau_dfp scaling. Same with dfp_scale.   */
935         tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/cg->yty;
936         beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*gd/dk_yk;
937         /* d <- -t*g + beta*d + tmp*yk */
938         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
939       } else {
940         /* We have diagonal scaling enabled */
941         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
942         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
943         /* compute scalar gamma */
944         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
945         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
946         gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp);
947         /* Compute scalar beta */
948         beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk;
949         /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
950         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
951       }
952       break;
953 
954     default:
955       break;
956 
957     }
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm)
963 {
964   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
965   PetscErrorCode               ierr;
966   TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING;
967   PetscReal                    step=1.0,gnorm2,gd,dnorm=0.0;
968   PetscReal                    gnorm2_old,f_old,resnorm, gnorm_old;
969   PetscBool                    pcgd_fallback = PETSC_FALSE;
970 
971   PetscFunctionBegin;
972   /* We are now going to perform a line search along the direction. */
973   /* Store solution and gradient info before it changes */
974   ierr = VecCopy(tao->solution, cg->X_old);CHKERRQ(ierr);
975   ierr = VecCopy(tao->gradient, cg->G_old);CHKERRQ(ierr);
976   ierr = VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old);CHKERRQ(ierr);
977 
978   gnorm_old = gnorm;
979   gnorm2_old = gnorm_old*gnorm_old;
980   f_old = cg->f;
981   /* Perform bounded line search. If we are recycling a solution from a previous */
982   /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */
983   if (!(cg->recycle && 0 == tao->niter)){
984     /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */
985     ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
986     ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
987     ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
988 
989     /*  Check linesearch failure */
990     if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
991       ++cg->ls_fails;
992       if (cg->cg_type == CG_GradientDescent){
993         /* Nothing left to do but fail out of the optimization */
994         step = 0.0;
995         tao->reason = TAO_DIVERGED_LS_FAILURE;
996       } else {
997         /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */
998         ierr = VecCopy(cg->X_old, tao->solution);CHKERRQ(ierr);
999         ierr = VecCopy(cg->G_old, tao->gradient);CHKERRQ(ierr);
1000         ierr = VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient);CHKERRQ(ierr);
1001         gnorm = gnorm_old;
1002         gnorm2 = gnorm2_old;
1003         cg->f = f_old;
1004 
1005         /* Fall back on preconditioned CG (so long as you're not already using it) */
1006         if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling){
1007           pcgd_fallback = PETSC_TRUE;
1008           ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback);CHKERRQ(ierr);
1009 
1010           ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1011           ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1012 
1013           ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
1014           ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
1015           ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
1016 
1017           pcgd_fallback = PETSC_FALSE;
1018           if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){
1019             /* Going to perform a regular gradient descent step. */
1020             ++cg->ls_fails;
1021             step = 0.0;
1022           }
1023         }
1024         /* Fall back on the scaled gradient step */
1025         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){
1026           ++cg->ls_fails;
1027           ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1028           ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1029           ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
1030           ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
1031           ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
1032         }
1033 
1034         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){
1035           /* Nothing left to do but fail out of the optimization */
1036           ++cg->ls_fails;
1037           step = 0.0;
1038           tao->reason = TAO_DIVERGED_LS_FAILURE;
1039         } else {
1040           /* One of the fallbacks worked. Set them both back equal to false. */
1041           pcgd_fallback = PETSC_FALSE;
1042         }
1043       }
1044     }
1045     /* Convergence test for line search failure */
1046     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
1047 
1048     /* Standard convergence test */
1049     ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr);
1050     ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr);
1051     if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
1052     ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr);
1053     ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr);
1054     ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
1055     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
1056   }
1057   /* Assert we have an updated step and we need at least one more iteration. */
1058   /* Calculate the next direction */
1059   /* Estimate the active set at the new solution */
1060   ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr);
1061   /* Compute the projected gradient and its norm */
1062   ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr);
1063   ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr);
1064   ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr);
1065   gnorm2 = gnorm*gnorm;
1066 
1067   /* Calculate some quantities used in the StepDirectionUpdate. */
1068   ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr);
1069   /* Update the step direction. */
1070   ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback);CHKERRQ(ierr);
1071   ++tao->niter;
1072   ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1073 
1074   if (cg->cg_type != CG_GradientDescent) {
1075     /* Figure out which previously active variables became inactive this iteration */
1076     ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr);
1077     if (cg->inactive_idx && cg->inactive_old) {
1078       ierr = ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives);CHKERRQ(ierr);
1079     }
1080     /* Selectively reset the CG step those freshly inactive variables */
1081     if (cg->new_inactives) {
1082       ierr = VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr);
1083       ierr = VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr);
1084       ierr = VecCopy(cg->inactive_grad, cg->inactive_step);CHKERRQ(ierr);
1085       ierr = VecScale(cg->inactive_step, -1.0);CHKERRQ(ierr);
1086       ierr = VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr);
1087       ierr = VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr);
1088     }
1089     /* Verify that this is a descent direction */
1090     ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
1091     ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr);
1092     if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) {
1093       /* Not a descent direction, so we reset back to projected gradient descent */
1094       ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1095       ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1096       ++cg->descent_error;
1097     } else {
1098     }
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0)
1104 {
1105   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
1106   PetscErrorCode               ierr;
1107 
1108   PetscFunctionBegin;
1109   ierr = PetscObjectReference((PetscObject)H0);CHKERRQ(ierr);
1110   cg->pc = H0;
1111   PetscFunctionReturn(0);
1112 }
1113