xref: /petsc/src/tao/bound/impls/bncg/bncg.c (revision 7f489da91baee7cfbedb679c2d7b0bca5dd3dd5b)
1 #include <petsctaolinesearch.h>
2 #include <../src/tao/bound/impls/bncg/bncg.h>
3 #include <petscksp.h>
4 
5 #define CG_GradientDescent      0
6 #define CG_HestenesStiefel      1
7 #define CG_FletcherReeves       2
8 #define CG_PolakRibierePolyak   3
9 #define CG_PolakRibierePlus     4
10 #define CG_DaiYuan              5
11 #define CG_HagerZhang           6
12 #define CG_DaiKou               7
13 #define CG_KouDai               8
14 #define CG_SSML_BFGS            9
15 #define CG_SSML_DFP             10
16 #define CG_SSML_BROYDEN         11
17 #define CG_PCGradientDescent    12
18 #define CGTypes                 13
19 
20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"};
21 
22 #define CG_AS_NONE       0
23 #define CG_AS_BERTSEKAS  1
24 #define CG_AS_SIZE       2
25 
26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"};
27 
28 PetscErrorCode TaoBNCGSetRecycleFlag(Tao tao, PetscBool recycle)
29 {
30   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
31 
32   PetscFunctionBegin;
33   cg->recycle = recycle;
34   PetscFunctionReturn(0);
35 }
36 
37 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType)
38 {
39   PetscErrorCode               ierr;
40   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;
41 
42   PetscFunctionBegin;
43   ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr);
44   if (cg->inactive_idx) {
45     ierr = ISDuplicate(cg->inactive_idx, &cg->inactive_old);CHKERRQ(ierr);
46     ierr = ISCopy(cg->inactive_idx, cg->inactive_old);CHKERRQ(ierr);
47   }
48   switch (asType) {
49   case CG_AS_NONE:
50     ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr);
51     ierr = VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx);CHKERRQ(ierr);
52     ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr);
53     ierr = ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx);CHKERRQ(ierr);
54     break;
55 
56   case CG_AS_BERTSEKAS:
57     /* Use gradient descent to estimate the active set */
58     ierr = VecCopy(cg->unprojected_gradient, cg->W);CHKERRQ(ierr);
59     ierr = VecScale(cg->W, -1.0);CHKERRQ(ierr);
60     ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol,
61                                    &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);CHKERRQ(ierr);
62     break;
63 
64   default:
65     break;
66   }
67   PetscFunctionReturn(0);
68 }
69 
70 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step)
71 {
72   PetscErrorCode               ierr;
73   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;
74 
75   PetscFunctionBegin;
76   switch (asType) {
77   case CG_AS_NONE:
78     ierr = VecISSet(step, cg->active_idx, 0.0);CHKERRQ(ierr);
79     break;
80 
81   case CG_AS_BERTSEKAS:
82     ierr = TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step);CHKERRQ(ierr);
83     break;
84 
85   default:
86     break;
87   }
88   PetscFunctionReturn(0);
89 }
90 
91 static PetscErrorCode TaoSolve_BNCG(Tao tao)
92 {
93   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
94   PetscErrorCode               ierr;
95   PetscReal                    step=1.0,gnorm,gnorm2, resnorm;
96   PetscInt                     nDiff;
97 
98   PetscFunctionBegin;
99   /*   Project the current point onto the feasible set */
100   ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr);
101   ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr);
102 
103   /* Project the initial point onto the feasible region */
104   ierr = TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution);CHKERRQ(ierr);
105 
106   if (nDiff > 0 || !cg->recycle){
107     ierr = TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient);CHKERRQ(ierr);
108   }
109   ierr = VecNorm(cg->unprojected_gradient,NORM_2,&gnorm);CHKERRQ(ierr);
110   if (PetscIsInfOrNanReal(cg->f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
111 
112   /* Estimate the active set and compute the projected gradient */
113   ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr);
114 
115   /* Project the gradient and calculate the norm */
116   ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr);
117   ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr);
118   ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr);
119   gnorm2 = gnorm*gnorm;
120 
121   /* Initialize counters */
122   tao->niter = 0;
123   cg->ls_fails = cg->descent_error = 0;
124   cg->resets = -1;
125   cg->skipped_updates = cg->pure_gd_steps = 0;
126   cg->iter_quad = 0;
127 
128   /* Convergence test at the starting point. */
129   tao->reason = TAO_CONTINUE_ITERATING;
130 
131   ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr);
132   ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr);
133   if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
134   ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr);
135   ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr);
136   ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
137   if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
138   /* Calculate initial direction. */
139   if (!cg->recycle) {
140     /* We are not recycling a solution/history from a past TaoSolve */
141     ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
142   }
143   /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */
144   while(1) {
145     /* Call general purpose update function */
146     if (tao->ops->update) {
147       ierr = (*tao->ops->update)(tao, tao->niter);CHKERRQ(ierr);
148     }
149     ierr = TaoBNCGConductIteration(tao, gnorm);CHKERRQ(ierr);
150     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
151   }
152   PetscFunctionReturn(0);
153 }
154 
155 static PetscErrorCode TaoSetUp_BNCG(Tao tao)
156 {
157   TAO_BNCG         *cg = (TAO_BNCG*)tao->data;
158   PetscErrorCode ierr;
159 
160   PetscFunctionBegin;
161   if (!tao->gradient) {
162     ierr = VecDuplicate(tao->solution,&tao->gradient);CHKERRQ(ierr);
163   }
164   if (!tao->stepdirection) {
165     ierr = VecDuplicate(tao->solution,&tao->stepdirection);CHKERRQ(ierr);
166   }
167   if (!cg->W) {
168     ierr = VecDuplicate(tao->solution,&cg->W);CHKERRQ(ierr);
169   }
170   if (!cg->work) {
171     ierr = VecDuplicate(tao->solution,&cg->work);CHKERRQ(ierr);
172   }
173   if (!cg->sk) {
174     ierr = VecDuplicate(tao->solution,&cg->sk);CHKERRQ(ierr);
175   }
176   if (!cg->yk) {
177     ierr = VecDuplicate(tao->gradient,&cg->yk);CHKERRQ(ierr);
178   }
179   if (!cg->X_old) {
180     ierr = VecDuplicate(tao->solution,&cg->X_old);CHKERRQ(ierr);
181   }
182   if (!cg->G_old) {
183     ierr = VecDuplicate(tao->gradient,&cg->G_old);CHKERRQ(ierr);
184   }
185   if (cg->diag_scaling){
186     ierr = VecDuplicate(tao->solution,&cg->d_work);CHKERRQ(ierr);
187     ierr = VecDuplicate(tao->solution,&cg->y_work);CHKERRQ(ierr);
188     ierr = VecDuplicate(tao->solution,&cg->g_work);CHKERRQ(ierr);
189   }
190   if (!cg->unprojected_gradient) {
191     ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient);CHKERRQ(ierr);
192   }
193   if (!cg->unprojected_gradient_old) {
194     ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient_old);CHKERRQ(ierr);
195   }
196   ierr = MatLMVMAllocate(cg->B, cg->sk, cg->yk);CHKERRQ(ierr);
197   if (cg->pc){
198     ierr = MatLMVMSetJ0(cg->B, cg->pc);CHKERRQ(ierr);
199   }
200   PetscFunctionReturn(0);
201 }
202 
203 static PetscErrorCode TaoDestroy_BNCG(Tao tao)
204 {
205   TAO_BNCG       *cg = (TAO_BNCG*) tao->data;
206   PetscErrorCode ierr;
207 
208   PetscFunctionBegin;
209   if (tao->setupcalled) {
210     ierr = VecDestroy(&cg->W);CHKERRQ(ierr);
211     ierr = VecDestroy(&cg->work);CHKERRQ(ierr);
212     ierr = VecDestroy(&cg->X_old);CHKERRQ(ierr);
213     ierr = VecDestroy(&cg->G_old);CHKERRQ(ierr);
214     ierr = VecDestroy(&cg->unprojected_gradient);CHKERRQ(ierr);
215     ierr = VecDestroy(&cg->unprojected_gradient_old);CHKERRQ(ierr);
216     ierr = VecDestroy(&cg->g_work);CHKERRQ(ierr);
217     ierr = VecDestroy(&cg->d_work);CHKERRQ(ierr);
218     ierr = VecDestroy(&cg->y_work);CHKERRQ(ierr);
219     ierr = VecDestroy(&cg->sk);CHKERRQ(ierr);
220     ierr = VecDestroy(&cg->yk);CHKERRQ(ierr);
221   }
222   ierr = ISDestroy(&cg->active_lower);CHKERRQ(ierr);
223   ierr = ISDestroy(&cg->active_upper);CHKERRQ(ierr);
224   ierr = ISDestroy(&cg->active_fixed);CHKERRQ(ierr);
225   ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr);
226   ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr);
227   ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr);
228   ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr);
229   ierr = MatDestroy(&cg->B);CHKERRQ(ierr);
230   if (cg->pc) {
231     ierr = MatDestroy(&cg->pc);CHKERRQ(ierr);
232   }
233   ierr = PetscFree(tao->data);CHKERRQ(ierr);
234   PetscFunctionReturn(0);
235 }
236 
237 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao)
238 {
239     TAO_BNCG       *cg = (TAO_BNCG*)tao->data;
240     PetscErrorCode ierr;
241 
242     PetscFunctionBegin;
243     ierr = TaoLineSearchSetFromOptions(tao->linesearch);CHKERRQ(ierr);
244     ierr = PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization");CHKERRQ(ierr);
245     ierr = PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr);
246     if (cg->cg_type != CG_SSML_BFGS){
247       cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */
248     }
249     if (CG_GradientDescent == cg->cg_type){
250       cg->cg_type = CG_PCGradientDescent;
251       /* Set scaling equal to none or, at best, scalar scaling. */
252       cg->unscaled_restart = PETSC_TRUE;
253       cg->diag_scaling = PETSC_FALSE;
254     }
255     ierr = PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr);
256 
257     ierr = PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL);CHKERRQ(ierr);
258     ierr = PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL);CHKERRQ(ierr);
259     ierr = PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL);CHKERRQ(ierr);
260     ierr = PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL);CHKERRQ(ierr);
261     ierr = PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL);CHKERRQ(ierr);
262     ierr = PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL);CHKERRQ(ierr);
263     ierr = PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL);CHKERRQ(ierr);
264     ierr = PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL);CHKERRQ(ierr);
265     ierr = PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL);CHKERRQ(ierr);
266     ierr = PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL);CHKERRQ(ierr);
267     ierr = PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL);CHKERRQ(ierr);
268     ierr = PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL);CHKERRQ(ierr);
269     ierr = PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL);CHKERRQ(ierr);
270     ierr = PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL);CHKERRQ(ierr);
271     ierr = PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL);CHKERRQ(ierr);
272     ierr = PetscOptionsBool("-tao_bncg_recycle","enable recycling the existing solution, gradient, and diagonal scaling vector at the start of a new TaoSolve() call","",cg->recycle,&cg->recycle,NULL);CHKERRQ(ierr);
273     ierr = PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL);CHKERRQ(ierr);
274     ierr = PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL);CHKERRQ(ierr);
275     if (cg->no_scaling){
276       cg->diag_scaling = PETSC_FALSE;
277       cg->alpha = -1.0;
278     }
279     if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling){ /* Some more default options that appear to be good. */
280       cg->neg_xi = PETSC_TRUE;
281     }
282     ierr = PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL);CHKERRQ(ierr);
283     ierr = PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL);CHKERRQ(ierr);
284     ierr = PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL);CHKERRQ(ierr);
285     ierr = PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL);CHKERRQ(ierr);
286     ierr = PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL);CHKERRQ(ierr);
287 
288    ierr = PetscOptionsTail();CHKERRQ(ierr);
289    ierr = MatSetFromOptions(cg->B);CHKERRQ(ierr);
290    PetscFunctionReturn(0);
291 }
292 
293 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer)
294 {
295   PetscBool      isascii;
296   TAO_BNCG       *cg = (TAO_BNCG*)tao->data;
297   PetscErrorCode ierr;
298 
299   PetscFunctionBegin;
300   ierr = PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr);
301   if (isascii) {
302     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
303     ierr = PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type]);CHKERRQ(ierr);
304     ierr = PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %i\n", cg->skipped_updates);CHKERRQ(ierr);
305     ierr = PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %i\n", cg->resets);CHKERRQ(ierr);
306     ierr = PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %i\n", cg->pure_gd_steps);CHKERRQ(ierr);
307     ierr = PetscViewerASCIIPrintf(viewer, "Not a descent direction: %i\n", cg->descent_error);CHKERRQ(ierr);
308     ierr = PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails);CHKERRQ(ierr);
309     if (cg->diag_scaling){
310       ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
311       if (isascii) {
312         ierr = PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
313         ierr = MatView(cg->B, viewer);CHKERRQ(ierr);
314         ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr);
315       }
316     }
317     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
318   }
319   PetscFunctionReturn(0);
320 }
321 
322 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha)
323 {
324   PetscReal            a, b, c, sig1, sig2;
325 
326   PetscFunctionBegin;
327   *scale = 0.0;
328 
329   if (1.0 == alpha){
330     *scale = yts/yty;
331   } else if (0.0 == alpha){
332     *scale = sts/yts;
333   }
334   else if (-1.0 == alpha) *scale = 1.0;
335   else {
336     a = yty;
337     b = yts;
338     c = sts;
339     a *= alpha;
340     b *= -(2.0*alpha - 1.0);
341     c *= alpha - 1.0;
342     sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
343     sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
344     /* accept the positive root as the scalar */
345     if (sig1 > 0.0) {
346       *scale = sig1;
347     } else if (sig2 > 0.0) {
348       *scale = sig2;
349     } else {
350       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar");
351     }
352   }
353   PetscFunctionReturn(0);
354 }
355 
356 /*MC
357      TAOBNCG -   Bound-constrained Nonlinear Conjugate Gradient method.
358 
359    Options Database Keys:
360 +      -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled)
361 .      -tao_bncg_eta <r> - restart tolerance
362 .      -tao_bncg_type <taocg_type> - cg formula
363 .      -tao_bncg_as_type <none,bertsekas> - active set estimation method
364 .      -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation
365 .      -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation
366 .      -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision.
367 .      -tao_bncg_theta <r> - convex combination parameter for the Broyden method
368 .      -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
369 .      -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
370 .      -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method
371 .      -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method
372 .      -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method
373 .      -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method
374 .      -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True.
375 .      -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods
376 .      -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts
377 .      -tao_bncg_zeta <r> - Scaling parameter in the KD method
378 .      -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps
379 .      -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps
380 .      -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart
381 .      -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem
382 .      -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations
383 .      -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults.
384 .      -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions
385 
386   Notes:
387      CG formulas are:
388          "gd" - Gradient Descent
389          "fr" - Fletcher-Reeves
390          "pr" - Polak-Ribiere-Polyak
391          "prp" - Polak-Ribiere-Plus
392          "hs" - Hestenes-Steifel
393          "dy" - Dai-Yuan
394          "ssml_bfgs" - Self-Scaling Memoryless BFGS
395          "ssml_dfp"  - Self-Scaling Memoryless DFP
396          "ssml_brdn" - Self-Scaling Memoryless Broyden
397          "hz" - Hager-Zhang (CG_DESCENT 5.3)
398          "dk" - Dai-Kou (2013)
399          "kd" - Kou-Dai (2015)
400   Level: beginner
401 M*/
402 
403 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao)
404 {
405   TAO_BNCG       *cg;
406   const char     *morethuente_type = TAOLINESEARCHMT;
407   PetscErrorCode ierr;
408 
409   PetscFunctionBegin;
410   tao->ops->setup = TaoSetUp_BNCG;
411   tao->ops->solve = TaoSolve_BNCG;
412   tao->ops->view = TaoView_BNCG;
413   tao->ops->setfromoptions = TaoSetFromOptions_BNCG;
414   tao->ops->destroy = TaoDestroy_BNCG;
415 
416   /* Override default settings (unless already changed) */
417   if (!tao->max_it_changed) tao->max_it = 2000;
418   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
419 
420   /*  Note: nondefault values should be used for nonlinear conjugate gradient  */
421   /*  method.  In particular, gtol should be less that 0.5; the value used in  */
422   /*  Nocedal and Wright is 0.10.  We use the default values for the  */
423   /*  linesearch because it seems to work better. */
424   ierr = TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch);CHKERRQ(ierr);
425   ierr = PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1);CHKERRQ(ierr);
426   ierr = TaoLineSearchSetType(tao->linesearch, morethuente_type);CHKERRQ(ierr);
427   ierr = TaoLineSearchUseTaoRoutines(tao->linesearch, tao);CHKERRQ(ierr);
428   ierr = TaoLineSearchSetOptionsPrefix(tao->linesearch,tao->hdr.prefix);CHKERRQ(ierr);
429 
430   ierr = PetscNewLog(tao,&cg);CHKERRQ(ierr);
431   tao->data = (void*)cg;
432   ierr = KSPInitializePackage();CHKERRQ(ierr);
433   ierr = MatCreate(PetscObjectComm((PetscObject)tao), &cg->B);CHKERRQ(ierr);
434   ierr = PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1);CHKERRQ(ierr);
435   ierr = MatSetOptionsPrefix(cg->B, "tao_bncg_");CHKERRQ(ierr);
436   ierr = MatSetType(cg->B, MATLMVMDIAGBRDN);CHKERRQ(ierr);
437 
438   cg->pc = NULL;
439 
440   cg->dk_eta = 0.5;
441   cg->hz_eta = 0.4;
442   cg->dynamic_restart = PETSC_FALSE;
443   cg->unscaled_restart = PETSC_FALSE;
444   cg->no_scaling = PETSC_FALSE;
445   cg->delta_min = 1e-7;
446   cg->delta_max = 100;
447   cg->theta = 1.0;
448   cg->hz_theta = 1.0;
449   cg->dfp_scale = 1.0;
450   cg->bfgs_scale = 1.0;
451   cg->zeta = 0.1;
452   cg->min_quad = 6;
453   cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/
454   cg->xi = 1.0;
455   cg->neg_xi = PETSC_TRUE;
456   cg->spaced_restart = PETSC_FALSE;
457   cg->tol_quad = 1e-8;
458   cg->as_step = 0.001;
459   cg->as_tol = 0.001;
460   cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/
461   cg->as_type = CG_AS_BERTSEKAS;
462   cg->cg_type = CG_SSML_BFGS;
463   cg->recycle = PETSC_FALSE;
464   cg->alpha = 1.0;
465   cg->diag_scaling = PETSC_TRUE;
466   PetscFunctionReturn(0);
467 }
468 
469 
470 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq)
471 {
472    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
473    PetscErrorCode    ierr;
474    PetscReal         scaling;
475 
476    PetscFunctionBegin;
477    ++cg->resets;
478    scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23);
479    scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling));
480    if (cg->unscaled_restart) {
481      scaling = 1.0;
482      ++cg->pure_gd_steps;
483    }
484    ierr = VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient);CHKERRQ(ierr);
485    /* Also want to reset our diagonal scaling with each restart */
486    if (cg->diag_scaling) {
487      ierr = MatLMVMReset(cg->B, PETSC_FALSE);CHKERRQ(ierr);
488    }
489    PetscFunctionReturn(0);
490  }
491 
492 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold)
493 {
494    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
495    PetscReal         quadinterp;
496 
497    PetscFunctionBegin;
498    if (cg->f < cg->min_quad/10) {
499      *dynrestart = PETSC_FALSE;
500      PetscFunctionReturn(0);
501    } /* just skip this since this strategy doesn't work well for functions near zero */
502    quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old));
503    if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad;
504    else {
505      cg->iter_quad = 0;
506      *dynrestart = PETSC_FALSE;
507    }
508    if (cg->iter_quad >= cg->min_quad) {
509      cg->iter_quad = 0;
510      *dynrestart = PETSC_TRUE;
511    }
512    PetscFunctionReturn(0);
513  }
514 
515 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback)
516 {
517   TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
518   PetscErrorCode    ierr;
519   PetscReal         gamma = 1.0, tau_k, beta;
520   PetscReal         tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd;
521   PetscReal         gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg;
522   PetscInt          dim;
523   PetscBool         cg_restart = PETSC_FALSE;
524   PetscFunctionBegin;
525 
526   /* Local curvature check to see if we need to restart */
527   if (tao->niter >= 1 || cg->recycle){
528     ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
529     ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
530     ynorm2 = ynorm*ynorm;
531     ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
532     if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON){
533       cg_restart = PETSC_TRUE;
534       ++cg->skipped_updates;
535     }
536     if (cg->spaced_restart) {
537       ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr);
538       if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE;
539     }
540   }
541   /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */
542   if (cg->spaced_restart){
543     ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr);
544     if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE;
545   }
546   /* Compute the diagonal scaling vector if applicable */
547   if (cg->diag_scaling) {
548     ierr = MatLMVMUpdate(cg->B, tao->solution, tao->gradient);CHKERRQ(ierr);
549   }
550 
551   /* A note on diagonal scaling (to be added to paper):
552    For the FR, PR, PRP, and DY methods, the diagonally scaled versions
553    must be derived as a preconditioned CG method rather than as
554    a Hessian initialization like in the Broyden methods. */
555 
556   /* In that case, one writes the objective function as
557    f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k).
558    Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to
559    HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the
560    same under preconditioning. Note that A is diagonal, such that A^T = A. */
561 
562   /* This yields questions like what the dot product d_k^T y_k
563    should look like. HZ mistakenly treats that as the same under
564    preconditioning, but that is not necessarily true. */
565 
566   /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation,
567    we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}),
568    yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is
569    NOT the same if our preconditioning matrix is updated between iterations.
570    This same issue is found when considering dot products of the form g_{k+1}^T y_k. */
571 
572   /* Compute CG step direction */
573   if (cg_restart) {
574     ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
575   } else if (pcgd_fallback) {
576     /* Just like preconditioned CG */
577     ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
578     ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr);
579   } else if (ynorm2 > PETSC_MACHINE_EPSILON) {
580     switch (cg->cg_type) {
581     case CG_PCGradientDescent:
582       if (!cg->diag_scaling){
583         if (!cg->no_scaling){
584         cg->sts = step*step*dnorm*dnorm;
585         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
586         } else {
587           tau_k = 1.0;
588           ++cg->pure_gd_steps;
589         }
590         ierr = VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient);CHKERRQ(ierr);
591       } else {
592         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
593         ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr);
594       }
595       break;
596 
597     case CG_HestenesStiefel:
598       /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */
599       if (!cg->diag_scaling){
600         cg->sts = step*step*dnorm*dnorm;
601         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
602         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
603         beta = tau_k*gkp1_yk/dk_yk;
604         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
605       } else {
606         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
607         ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
608         beta = gkp1_yk/dk_yk;
609         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
610       }
611       break;
612 
613     case CG_FletcherReeves:
614       ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
615       ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
616       ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
617       ynorm2 = ynorm*ynorm;
618       ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
619       if (!cg->diag_scaling){
620         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha);CHKERRQ(ierr);
621         beta = tau_k*gnorm2/gnorm2_old;
622         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
623       } else {
624         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Before it's updated */
625         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
626         ierr = VecDot(tao->gradient, cg->g_work, &tmp);CHKERRQ(ierr);
627         beta = tmp/gnorm2_old;
628         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
629       }
630       break;
631 
632     case CG_PolakRibierePolyak:
633       snorm = step*dnorm;
634       if (!cg->diag_scaling){
635         ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
636         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
637         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
638         beta = tau_k*gkp1_yk/gnorm2_old;
639         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
640       } else {
641         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr);
642         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
643         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
644         beta = gkp1_yk/gnorm2_old;
645         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
646       }
647       break;
648 
649     case CG_PolakRibierePlus:
650       ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
651       ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
652       ynorm2 = ynorm*ynorm;
653       if (!cg->diag_scaling){
654         ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
655         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
656         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
657         beta = tau_k*gkp1_yk/gnorm2_old;
658         beta = PetscMax(beta, 0.0);
659         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
660       } else {
661         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Old gtDg */
662         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
663         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
664         beta = gkp1_yk/gnorm2_old;
665         beta = PetscMax(beta, 0.0);
666         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
667       }
668       break;
669 
670     case CG_DaiYuan:
671       /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property."
672          SIAM Journal on optimization 10, no. 1 (1999): 177-182. */
673       if (!cg->diag_scaling){
674         ierr = VecDot(tao->stepdirection, tao->gradient, &gd);CHKERRQ(ierr);
675         ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
676         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha);CHKERRQ(ierr);
677         beta = tau_k*gnorm2/(gd - gd_old);
678         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
679       } else {
680         ierr = MatMult(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
681         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
682         ierr = VecDot(cg->g_work, tao->gradient, &gtDg);CHKERRQ(ierr);
683         ierr = VecDot(tao->stepdirection, cg->G_old, &gd_old);CHKERRQ(ierr);
684         ierr = VecDot(cg->d_work, cg->g_work, &dk_yk);CHKERRQ(ierr);
685         dk_yk = dk_yk - gd_old;
686         beta = gtDg/dk_yk;
687         ierr = VecScale(cg->d_work, beta);CHKERRQ(ierr);
688         ierr = VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work);CHKERRQ(ierr);
689       }
690       break;
691 
692     case CG_HagerZhang:
693       /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent."
694          ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */
695       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
696       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
697       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
698       snorm = dnorm*step;
699       cg->yts = step*dk_yk;
700       if (cg->use_dynamic_restart){
701         ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr);
702       }
703       if (cg->dynamic_restart){
704         ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
705       } else {
706         if (!cg->diag_scaling){
707           ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
708           ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
709           /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */
710           tmp = gd/dk_yk;
711           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk));
712           /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */
713           beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
714           /* d <- -t*g + beta*t*d */
715           ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
716         } else {
717           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
718           cg->yty = ynorm2;
719           cg->sts = snorm*snorm;
720           /* Apply the diagonal scaling to all my vectors */
721           ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
722           ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
723           ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
724           /* Construct the constant ytDgkp1 */
725           ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
726           /* Construct the constant for scaling Dkyk in the update */
727           tmp = gd/dk_yk;
728           ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
729           tau_k = -tau_k*gd/(dk_yk*dk_yk);
730           /* beta is the constant which adds the dk contribution */
731           beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */
732           /* From HZ2013, modified to account for diagonal scaling*/
733           ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr);
734           ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr);
735           beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
736           /* Do the update */
737           ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
738         }
739       }
740       break;
741 
742     case CG_DaiKou:
743       /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search."
744          SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */
745       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
746       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
747       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
748       snorm = step*dnorm;
749       cg->yts = dk_yk*step;
750       if (!cg->diag_scaling){
751         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
752         ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
753         /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */
754         tmp = gd/dk_yk;
755         beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk;
756         beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
757         /* d <- -t*g + beta*t*d */
758         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
759       } else {
760         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
761         cg->yty = ynorm2;
762         cg->sts = snorm*snorm;
763         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
764         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
765         ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
766         /* Construct the constant ytDgkp1 */
767         ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
768         ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
769         tau_k = tau_k*gd/(dk_yk*dk_yk);
770         tmp = gd/dk_yk;
771         /* beta is the constant which adds the dk contribution */
772         beta = gkp1_yk/dk_yk - step*tmp - tau_k;
773         /* Update this for the last term in beta */
774         ierr = VecDot(cg->y_work, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
775         beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */
776         ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr);
777         ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr);
778         beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
779         /* Do the update */
780         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
781       }
782       break;
783 
784     case CG_KouDai:
785       /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden–Fletcher–Goldfarb–Shanno method for unconstrained optimization."
786          Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */
787       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
788       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
789       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
790       snorm = step*dnorm;
791       cg->yts = dk_yk*step;
792       if (cg->use_dynamic_restart){
793         ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr);
794       }
795       if (cg->dynamic_restart){
796         ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
797       } else {
798         if (!cg->diag_scaling){
799           ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
800           ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
801           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk;
802           if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */
803           {
804             beta = cg->zeta*tau_k*gd/(dnorm*dnorm);
805             gamma = 0.0;
806           } else {
807             if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk;
808             /* This seems to be very effective when there's no tau_k scaling.
809                This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */
810             else {
811               gamma = cg->xi*gd/dk_yk;
812             }
813           }
814           /* d <- -t*g + beta*t*d + t*tmp*yk */
815           ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
816         } else {
817           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
818           cg->yty = ynorm2;
819           cg->sts = snorm*snorm;
820           ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
821           ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
822           /* Construct the constant ytDgkp1 */
823           ierr = VecDot(cg->yk, cg->g_work, &gkp1D_yk);CHKERRQ(ierr);
824           /* Construct the constant for scaling Dkyk in the update */
825           gamma = gd/dk_yk;
826           /* tau_k = -ytDy/(ytd)^2 * gd */
827           ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
828           tau_k = tau_k*gd/(dk_yk*dk_yk);
829           /* beta is the constant which adds the d_k contribution */
830           beta = gkp1D_yk/dk_yk - step*gamma - tau_k;
831           /* Here is the requisite check */
832           ierr = VecDot(tao->stepdirection, cg->g_work, &tmp);CHKERRQ(ierr);
833           if (cg->neg_xi){
834             /* modified KD implementation */
835             if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk;
836             else {
837               gamma = cg->xi*gd/dk_yk;
838             }
839             if (beta < cg->zeta*tmp/(dnorm*dnorm)){
840               beta = cg->zeta*tmp/(dnorm*dnorm);
841               gamma = 0.0;
842             }
843           } else { /* original KD 2015 implementation */
844             if (beta < cg->zeta*tmp/(dnorm*dnorm)) {
845               beta = cg->zeta*tmp/(dnorm*dnorm);
846               gamma = 0.0;
847             } else {
848               gamma = cg->xi*gd/dk_yk;
849             }
850           }
851           /* Do the update in two steps */
852           ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
853           ierr = VecAXPY(tao->stepdirection, gamma, cg->y_work);CHKERRQ(ierr);
854         }
855       }
856       break;
857 
858     case CG_SSML_BFGS:
859       /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory."
860          Discussion Papers 269 (1977). */
861       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
862       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
863       snorm = step*dnorm;
864       cg->yts = dk_yk*step;
865       cg->yty = ynorm2;
866       cg->sts = snorm*snorm;
867       if (!cg->diag_scaling){
868         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
869         ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
870         tmp = gd/dk_yk;
871         beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp;
872         /* d <- -t*g + beta*t*d + t*tmp*yk */
873         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
874       } else {
875         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
876         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
877         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
878         /* compute scalar gamma */
879         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
880         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
881         gamma = gd/dk_yk;
882         /* Compute scalar beta */
883         beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk;
884         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
885         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
886       }
887       break;
888 
889     case CG_SSML_DFP:
890       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
891       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
892       snorm = step*dnorm;
893       cg->yts = dk_yk*step;
894       cg->yty = ynorm2;
895       cg->sts = snorm*snorm;
896       if (!cg->diag_scaling){
897         /* Instead of a regular convex combination, we will solve a quadratic formula. */
898         ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
899         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
900         tau_k = cg->dfp_scale*tau_k;
901         tmp = tau_k*gkp1_yk/cg->yty;
902         beta = -step*gd/dk_yk;
903         /* d <- -t*g + beta*d + tmp*yk */
904         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
905       } else {
906         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */
907         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
908         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
909         /* compute scalar gamma */
910         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
911         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
912         gamma = (gkp1_yk/tmp);
913         /* Compute scalar beta */
914         beta = -step*gd/dk_yk;
915         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
916         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
917       }
918       break;
919 
920     case CG_SSML_BROYDEN:
921       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
922       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
923       snorm = step*dnorm;
924       cg->yts = step*dk_yk;
925       cg->yty = ynorm2;
926       cg->sts = snorm*snorm;
927       if (!cg->diag_scaling){
928         /* Instead of a regular convex combination, we will solve a quadratic formula. */
929         ierr = TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale);CHKERRQ(ierr);
930         ierr = TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale);CHKERRQ(ierr);
931         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
932         tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp;
933         /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0,
934            it should reproduce the tau_dfp scaling. Same with dfp_scale.   */
935         tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/cg->yty;
936         beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*gd/dk_yk;
937         /* d <- -t*g + beta*d + tmp*yk */
938         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
939       } else {
940         /* We have diagonal scaling enabled */
941         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
942         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
943         /* compute scalar gamma */
944         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
945         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
946         gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp);
947         /* Compute scalar beta */
948         beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk;
949         /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
950         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
951       }
952       break;
953 
954     default:
955       break;
956 
957     }
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm)
963 {
964   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
965   PetscErrorCode               ierr;
966   TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING;
967   PetscReal                    step=1.0,gnorm2,gd,dnorm=0.0;
968   PetscReal                    gnorm2_old,f_old,resnorm, gnorm_old;
969   PetscBool                    pcgd_fallback = PETSC_FALSE;
970 
971   PetscFunctionBegin;
972   /* We are now going to perform a line search along the direction. */
973   /* Store solution and gradient info before it changes */
974   ierr = VecCopy(tao->solution, cg->X_old);CHKERRQ(ierr);
975   ierr = VecCopy(tao->gradient, cg->G_old);CHKERRQ(ierr);
976   ierr = VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old);CHKERRQ(ierr);
977 
978   gnorm_old = gnorm;
979   gnorm2_old = gnorm_old*gnorm_old;
980   f_old = cg->f;
981   /* Perform bounded line search. If we are recycling a solution from a previous */
982   /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */
983   if (!(cg->recycle && 0 == tao->niter)){
984     /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */
985     ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
986     ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
987     ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
988 
989     /*  Check linesearch failure */
990     if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
991       ++cg->ls_fails;
992       if (cg->cg_type == CG_GradientDescent){
993         /* Nothing left to do but fail out of the optimization */
994         step = 0.0;
995         tao->reason = TAO_DIVERGED_LS_FAILURE;
996       } else {
997         /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */
998         ierr = VecCopy(cg->X_old, tao->solution);CHKERRQ(ierr);
999         ierr = VecCopy(cg->G_old, tao->gradient);CHKERRQ(ierr);
1000         ierr = VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient);CHKERRQ(ierr);
1001         gnorm = gnorm_old;
1002         gnorm2 = gnorm2_old;
1003         cg->f = f_old;
1004 
1005         /* Fall back on preconditioned CG (so long as you're not already using it) */
1006         if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling){
1007           pcgd_fallback = PETSC_TRUE;
1008           ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback);CHKERRQ(ierr);
1009 
1010           ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1011           ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1012 
1013           ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
1014           ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
1015           ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
1016 
1017           pcgd_fallback = PETSC_FALSE;
1018           if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){
1019             /* Going to perform a regular gradient descent step. */
1020             ++cg->ls_fails;
1021             step = 0.0;
1022           }
1023         }
1024         /* Fall back on the scaled gradient step */
1025         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){
1026           ++cg->ls_fails;
1027           ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1028           ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1029           ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
1030           ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
1031           ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
1032         }
1033 
1034         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){
1035           /* Nothing left to do but fail out of the optimization */
1036           ++cg->ls_fails;
1037           step = 0.0;
1038           tao->reason = TAO_DIVERGED_LS_FAILURE;
1039         } else {
1040           /* One of the fallbacks worked. Set them both back equal to false. */
1041           pcgd_fallback = PETSC_FALSE;
1042         }
1043       }
1044     }
1045     /* Convergence test for line search failure */
1046     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
1047 
1048     /* Standard convergence test */
1049     ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr);
1050     ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr);
1051     if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
1052     ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr);
1053     ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr);
1054     ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
1055     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
1056   }
1057   /* Assert we have an updated step and we need at least one more iteration. */
1058   /* Calculate the next direction */
1059   /* Estimate the active set at the new solution */
1060   ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr);
1061   /* Compute the projected gradient and its norm */
1062   ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr);
1063   ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr);
1064   ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr);
1065   gnorm2 = gnorm*gnorm;
1066 
1067   /* Calculate some quantities used in the StepDirectionUpdate. */
1068   ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr);
1069   /* Update the step direction. */
1070   ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback);CHKERRQ(ierr);
1071   ++tao->niter;
1072   ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1073 
1074   if (cg->cg_type != CG_GradientDescent) {
1075     /* Figure out which previously active variables became inactive this iteration */
1076     ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr);
1077     if (cg->inactive_idx && cg->inactive_old) {
1078       ierr = ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives);CHKERRQ(ierr);
1079     }
1080     /* Selectively reset the CG step those freshly inactive variables */
1081     if (cg->new_inactives) {
1082       ierr = VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr);
1083       ierr = VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr);
1084       ierr = VecCopy(cg->inactive_grad, cg->inactive_step);CHKERRQ(ierr);
1085       ierr = VecScale(cg->inactive_step, -1.0);CHKERRQ(ierr);
1086       ierr = VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr);
1087       ierr = VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr);
1088     }
1089     /* Verify that this is a descent direction */
1090     ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
1091     ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr);
1092     if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) {
1093       /* Not a descent direction, so we reset back to projected gradient descent */
1094       ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1095       ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1096       ++cg->descent_error;
1097     } else {
1098     }
1099   }
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0)
1104 {
1105   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
1106   PetscErrorCode               ierr;
1107 
1108   PetscFunctionBegin;
1109   ierr = PetscObjectReference((PetscObject)H0);CHKERRQ(ierr);
1110   cg->pc = H0;
1111   PetscFunctionReturn(0);
1112 }
1113