xref: /petsc/src/tao/bound/impls/bncg/bncg.c (revision 5f80ce2ab25dff0f4601e710601cbbcecf323266)
1 #include <petsctaolinesearch.h>
2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/
3 #include <petscksp.h>
4 
5 #define CG_GradientDescent      0
6 #define CG_HestenesStiefel      1
7 #define CG_FletcherReeves       2
8 #define CG_PolakRibierePolyak   3
9 #define CG_PolakRibierePlus     4
10 #define CG_DaiYuan              5
11 #define CG_HagerZhang           6
12 #define CG_DaiKou               7
13 #define CG_KouDai               8
14 #define CG_SSML_BFGS            9
15 #define CG_SSML_DFP             10
16 #define CG_SSML_BROYDEN         11
17 #define CG_PCGradientDescent    12
18 #define CGTypes                 13
19 
20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"};
21 
22 #define CG_AS_NONE       0
23 #define CG_AS_BERTSEKAS  1
24 #define CG_AS_SIZE       2
25 
26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"};
27 
28 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType)
29 {
30   PetscErrorCode               ierr;
31   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;
32 
33   PetscFunctionBegin;
34   CHKERRQ(ISDestroy(&cg->inactive_old));
35   if (cg->inactive_idx) {
36     CHKERRQ(ISDuplicate(cg->inactive_idx, &cg->inactive_old));
37     CHKERRQ(ISCopy(cg->inactive_idx, cg->inactive_old));
38   }
39   switch (asType) {
40   case CG_AS_NONE:
41     CHKERRQ(ISDestroy(&cg->inactive_idx));
42     CHKERRQ(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx));
43     CHKERRQ(ISDestroy(&cg->active_idx));
44     CHKERRQ(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx));
45     break;
46 
47   case CG_AS_BERTSEKAS:
48     /* Use gradient descent to estimate the active set */
49     CHKERRQ(VecCopy(cg->unprojected_gradient, cg->W));
50     CHKERRQ(VecScale(cg->W, -1.0));
51     ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol,
52                                    &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);CHKERRQ(ierr);
53     break;
54 
55   default:
56     break;
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step)
62 {
63   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;
64 
65   PetscFunctionBegin;
66   switch (asType) {
67   case CG_AS_NONE:
68     CHKERRQ(VecISSet(step, cg->active_idx, 0.0));
69     break;
70 
71   case CG_AS_BERTSEKAS:
72     CHKERRQ(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step));
73     break;
74 
75   default:
76     break;
77   }
78   PetscFunctionReturn(0);
79 }
80 
81 static PetscErrorCode TaoSolve_BNCG(Tao tao)
82 {
83   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
84   PetscReal                    step=1.0,gnorm,gnorm2, resnorm;
85   PetscInt                     nDiff;
86 
87   PetscFunctionBegin;
88   /*   Project the current point onto the feasible set */
89   CHKERRQ(TaoComputeVariableBounds(tao));
90   CHKERRQ(TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU));
91 
92   /* Project the initial point onto the feasible region */
93   CHKERRQ(TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution));
94 
95   if (nDiff > 0 || !tao->recycle) {
96     CHKERRQ(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient));
97   }
98   CHKERRQ(VecNorm(cg->unprojected_gradient,NORM_2,&gnorm));
99   PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
100 
101   /* Estimate the active set and compute the projected gradient */
102   CHKERRQ(TaoBNCGEstimateActiveSet(tao, cg->as_type));
103 
104   /* Project the gradient and calculate the norm */
105   CHKERRQ(VecCopy(cg->unprojected_gradient, tao->gradient));
106   CHKERRQ(VecISSet(tao->gradient, cg->active_idx, 0.0));
107   CHKERRQ(VecNorm(tao->gradient,NORM_2,&gnorm));
108   gnorm2 = gnorm*gnorm;
109 
110   /* Initialize counters */
111   tao->niter = 0;
112   cg->ls_fails = cg->descent_error = 0;
113   cg->resets = -1;
114   cg->skipped_updates = cg->pure_gd_steps = 0;
115   cg->iter_quad = 0;
116 
117   /* Convergence test at the starting point. */
118   tao->reason = TAO_CONTINUE_ITERATING;
119 
120   CHKERRQ(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W));
121   CHKERRQ(VecNorm(cg->W, NORM_2, &resnorm));
122   PetscCheck(!PetscIsInfOrNanReal(resnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
123   CHKERRQ(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its));
124   CHKERRQ(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step));
125   CHKERRQ((*tao->ops->convergencetest)(tao,tao->cnvP));
126   if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
127   /* Calculate initial direction. */
128   if (!tao->recycle) {
129     /* We are not recycling a solution/history from a past TaoSolve */
130     CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2));
131   }
132   /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */
133   while (1) {
134     /* Call general purpose update function */
135     if (tao->ops->update) {
136       CHKERRQ((*tao->ops->update)(tao, tao->niter, tao->user_update));
137     }
138     CHKERRQ(TaoBNCGConductIteration(tao, gnorm));
139     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
140   }
141 }
142 
143 static PetscErrorCode TaoSetUp_BNCG(Tao tao)
144 {
145   TAO_BNCG         *cg = (TAO_BNCG*)tao->data;
146 
147   PetscFunctionBegin;
148   if (!tao->gradient) {
149     CHKERRQ(VecDuplicate(tao->solution,&tao->gradient));
150   }
151   if (!tao->stepdirection) {
152     CHKERRQ(VecDuplicate(tao->solution,&tao->stepdirection));
153   }
154   if (!cg->W) {
155     CHKERRQ(VecDuplicate(tao->solution,&cg->W));
156   }
157   if (!cg->work) {
158     CHKERRQ(VecDuplicate(tao->solution,&cg->work));
159   }
160   if (!cg->sk) {
161     CHKERRQ(VecDuplicate(tao->solution,&cg->sk));
162   }
163   if (!cg->yk) {
164     CHKERRQ(VecDuplicate(tao->gradient,&cg->yk));
165   }
166   if (!cg->X_old) {
167     CHKERRQ(VecDuplicate(tao->solution,&cg->X_old));
168   }
169   if (!cg->G_old) {
170     CHKERRQ(VecDuplicate(tao->gradient,&cg->G_old));
171   }
172   if (cg->diag_scaling) {
173     CHKERRQ(VecDuplicate(tao->solution,&cg->d_work));
174     CHKERRQ(VecDuplicate(tao->solution,&cg->y_work));
175     CHKERRQ(VecDuplicate(tao->solution,&cg->g_work));
176   }
177   if (!cg->unprojected_gradient) {
178     CHKERRQ(VecDuplicate(tao->gradient,&cg->unprojected_gradient));
179   }
180   if (!cg->unprojected_gradient_old) {
181     CHKERRQ(VecDuplicate(tao->gradient,&cg->unprojected_gradient_old));
182   }
183   CHKERRQ(MatLMVMAllocate(cg->B, cg->sk, cg->yk));
184   if (cg->pc) {
185     CHKERRQ(MatLMVMSetJ0(cg->B, cg->pc));
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 static PetscErrorCode TaoDestroy_BNCG(Tao tao)
191 {
192   TAO_BNCG       *cg = (TAO_BNCG*) tao->data;
193 
194   PetscFunctionBegin;
195   if (tao->setupcalled) {
196     CHKERRQ(VecDestroy(&cg->W));
197     CHKERRQ(VecDestroy(&cg->work));
198     CHKERRQ(VecDestroy(&cg->X_old));
199     CHKERRQ(VecDestroy(&cg->G_old));
200     CHKERRQ(VecDestroy(&cg->unprojected_gradient));
201     CHKERRQ(VecDestroy(&cg->unprojected_gradient_old));
202     CHKERRQ(VecDestroy(&cg->g_work));
203     CHKERRQ(VecDestroy(&cg->d_work));
204     CHKERRQ(VecDestroy(&cg->y_work));
205     CHKERRQ(VecDestroy(&cg->sk));
206     CHKERRQ(VecDestroy(&cg->yk));
207   }
208   CHKERRQ(ISDestroy(&cg->active_lower));
209   CHKERRQ(ISDestroy(&cg->active_upper));
210   CHKERRQ(ISDestroy(&cg->active_fixed));
211   CHKERRQ(ISDestroy(&cg->active_idx));
212   CHKERRQ(ISDestroy(&cg->inactive_idx));
213   CHKERRQ(ISDestroy(&cg->inactive_old));
214   CHKERRQ(ISDestroy(&cg->new_inactives));
215   CHKERRQ(MatDestroy(&cg->B));
216   if (cg->pc) {
217     CHKERRQ(MatDestroy(&cg->pc));
218   }
219   CHKERRQ(PetscFree(tao->data));
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao)
224 {
225   TAO_BNCG       *cg = (TAO_BNCG*)tao->data;
226 
227   PetscFunctionBegin;
228   CHKERRQ(PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization"));
229   CHKERRQ(PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL));
230   if (cg->cg_type != CG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */
231   if (CG_GradientDescent == cg->cg_type) {
232     cg->cg_type = CG_PCGradientDescent;
233     /* Set scaling equal to none or, at best, scalar scaling. */
234     cg->unscaled_restart = PETSC_TRUE;
235     cg->diag_scaling = PETSC_FALSE;
236   }
237   CHKERRQ(PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL));
238   CHKERRQ(PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL));
239   CHKERRQ(PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL));
240   CHKERRQ(PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL));
241   CHKERRQ(PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL));
242   CHKERRQ(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL));
243   CHKERRQ(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL));
244   CHKERRQ(PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL));
245   CHKERRQ(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL));
246   CHKERRQ(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL));
247   CHKERRQ(PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL));
248   CHKERRQ(PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL));
249   CHKERRQ(PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL));
250   CHKERRQ(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL));
251   CHKERRQ(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL));
252   CHKERRQ(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL));
253   CHKERRQ(PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL));
254   CHKERRQ(PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL));
255   if (cg->no_scaling) {
256     cg->diag_scaling = PETSC_FALSE;
257     cg->alpha = -1.0;
258   }
259   if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling) { /* Some more default options that appear to be good. */
260     cg->neg_xi = PETSC_TRUE;
261   }
262   CHKERRQ(PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL));
263   CHKERRQ(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL));
264   CHKERRQ(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL));
265   CHKERRQ(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL));
266   CHKERRQ(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL));
267 
268   CHKERRQ(PetscOptionsTail());
269   CHKERRQ(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix));
270   CHKERRQ(MatAppendOptionsPrefix(cg->B, "tao_bncg_"));
271   CHKERRQ(MatSetFromOptions(cg->B));
272   PetscFunctionReturn(0);
273 }
274 
275 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer)
276 {
277   PetscBool      isascii;
278   TAO_BNCG       *cg = (TAO_BNCG*)tao->data;
279 
280   PetscFunctionBegin;
281   CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
282   if (isascii) {
283     CHKERRQ(PetscViewerASCIIPushTab(viewer));
284     CHKERRQ(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type]));
285     CHKERRQ(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %i\n", cg->skipped_updates));
286     CHKERRQ(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %i\n", cg->resets));
287     CHKERRQ(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %i\n", cg->pure_gd_steps));
288     CHKERRQ(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %i\n", cg->descent_error));
289     CHKERRQ(PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails));
290     if (cg->diag_scaling) {
291       CHKERRQ(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii));
292       if (isascii) {
293         CHKERRQ(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO));
294         CHKERRQ(MatView(cg->B, viewer));
295         CHKERRQ(PetscViewerPopFormat(viewer));
296       }
297     }
298     CHKERRQ(PetscViewerASCIIPopTab(viewer));
299   }
300   PetscFunctionReturn(0);
301 }
302 
303 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha)
304 {
305   PetscReal a, b, c, sig1, sig2;
306 
307   PetscFunctionBegin;
308   *scale = 0.0;
309   if (1.0 == alpha) *scale = yts/yty;
310   else if (0.0 == alpha) *scale = sts/yts;
311   else if (-1.0 == alpha) *scale = 1.0;
312   else {
313     a = yty;
314     b = yts;
315     c = sts;
316     a *= alpha;
317     b *= -(2.0*alpha - 1.0);
318     c *= alpha - 1.0;
319     sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
320     sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
321     /* accept the positive root as the scalar */
322     if (sig1 > 0.0) *scale = sig1;
323     else if (sig2 > 0.0) *scale = sig2;
324     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar");
325   }
326   PetscFunctionReturn(0);
327 }
328 
329 /*MC
330   TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method.
331 
332   Options Database Keys:
333 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled)
334 . -tao_bncg_eta <r> - restart tolerance
335 . -tao_bncg_type <taocg_type> - cg formula
336 . -tao_bncg_as_type <none,bertsekas> - active set estimation method
337 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation
338 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation
339 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision.
340 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method
341 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
342 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
343 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method
344 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method
345 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method
346 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method
347 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True.
348 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods
349 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts
350 . -tao_bncg_zeta <r> - Scaling parameter in the KD method
351 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps
352 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps
353 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart
354 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem
355 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations
356 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults.
357 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions
358 
359   Notes:
360     CG formulas are:
361 + "gd" - Gradient Descent
362 . "fr" - Fletcher-Reeves
363 . "pr" - Polak-Ribiere-Polyak
364 . "prp" - Polak-Ribiere-Plus
365 . "hs" - Hestenes-Steifel
366 . "dy" - Dai-Yuan
367 . "ssml_bfgs" - Self-Scaling Memoryless BFGS
368 . "ssml_dfp"  - Self-Scaling Memoryless DFP
369 . "ssml_brdn" - Self-Scaling Memoryless Broyden
370 . "hz" - Hager-Zhang (CG_DESCENT 5.3)
371 . "dk" - Dai-Kou (2013)
372 - "kd" - Kou-Dai (2015)
373 
374   Level: beginner
375 M*/
376 
377 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao)
378 {
379   TAO_BNCG       *cg;
380   const char     *morethuente_type = TAOLINESEARCHMT;
381 
382   PetscFunctionBegin;
383   tao->ops->setup = TaoSetUp_BNCG;
384   tao->ops->solve = TaoSolve_BNCG;
385   tao->ops->view = TaoView_BNCG;
386   tao->ops->setfromoptions = TaoSetFromOptions_BNCG;
387   tao->ops->destroy = TaoDestroy_BNCG;
388 
389   /* Override default settings (unless already changed) */
390   if (!tao->max_it_changed) tao->max_it = 2000;
391   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
392 
393   /*  Note: nondefault values should be used for nonlinear conjugate gradient  */
394   /*  method.  In particular, gtol should be less that 0.5; the value used in  */
395   /*  Nocedal and Wright is 0.10.  We use the default values for the  */
396   /*  linesearch because it seems to work better. */
397   CHKERRQ(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch));
398   CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1));
399   CHKERRQ(TaoLineSearchSetType(tao->linesearch, morethuente_type));
400   CHKERRQ(TaoLineSearchUseTaoRoutines(tao->linesearch, tao));
401 
402   CHKERRQ(PetscNewLog(tao,&cg));
403   tao->data = (void*)cg;
404   CHKERRQ(KSPInitializePackage());
405   CHKERRQ(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B));
406   CHKERRQ(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1));
407   CHKERRQ(MatSetType(cg->B, MATLMVMDIAGBROYDEN));
408 
409   cg->pc = NULL;
410 
411   cg->dk_eta = 0.5;
412   cg->hz_eta = 0.4;
413   cg->dynamic_restart = PETSC_FALSE;
414   cg->unscaled_restart = PETSC_FALSE;
415   cg->no_scaling = PETSC_FALSE;
416   cg->delta_min = 1e-7;
417   cg->delta_max = 100;
418   cg->theta = 1.0;
419   cg->hz_theta = 1.0;
420   cg->dfp_scale = 1.0;
421   cg->bfgs_scale = 1.0;
422   cg->zeta = 0.1;
423   cg->min_quad = 6;
424   cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/
425   cg->xi = 1.0;
426   cg->neg_xi = PETSC_TRUE;
427   cg->spaced_restart = PETSC_FALSE;
428   cg->tol_quad = 1e-8;
429   cg->as_step = 0.001;
430   cg->as_tol = 0.001;
431   cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/
432   cg->as_type = CG_AS_BERTSEKAS;
433   cg->cg_type = CG_SSML_BFGS;
434   cg->alpha = 1.0;
435   cg->diag_scaling = PETSC_TRUE;
436   PetscFunctionReturn(0);
437 }
438 
439 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq)
440 {
441    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
442    PetscReal         scaling;
443 
444    PetscFunctionBegin;
445    ++cg->resets;
446    scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23);
447    scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling));
448    if (cg->unscaled_restart) {
449      scaling = 1.0;
450      ++cg->pure_gd_steps;
451    }
452    CHKERRQ(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient));
453    /* Also want to reset our diagonal scaling with each restart */
454    if (cg->diag_scaling) {
455      CHKERRQ(MatLMVMReset(cg->B, PETSC_FALSE));
456    }
457    PetscFunctionReturn(0);
458  }
459 
460 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold)
461 {
462    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
463    PetscReal         quadinterp;
464 
465    PetscFunctionBegin;
466    if (cg->f < cg->min_quad/10) {
467      *dynrestart = PETSC_FALSE;
468      PetscFunctionReturn(0);
469    } /* just skip this since this strategy doesn't work well for functions near zero */
470    quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old));
471    if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad;
472    else {
473      cg->iter_quad = 0;
474      *dynrestart = PETSC_FALSE;
475    }
476    if (cg->iter_quad >= cg->min_quad) {
477      cg->iter_quad = 0;
478      *dynrestart = PETSC_TRUE;
479    }
480    PetscFunctionReturn(0);
481  }
482 
483 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback)
484 {
485   TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
486   PetscReal         gamma = 1.0, tau_k, beta;
487   PetscReal         tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd;
488   PetscReal         gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg;
489   PetscInt          dim;
490   PetscBool         cg_restart = PETSC_FALSE;
491   PetscFunctionBegin;
492 
493   /* Local curvature check to see if we need to restart */
494   if (tao->niter >= 1 || tao->recycle) {
495     CHKERRQ(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient));
496     CHKERRQ(VecNorm(cg->yk, NORM_2, &ynorm));
497     ynorm2 = ynorm*ynorm;
498     CHKERRQ(VecDot(cg->yk, tao->stepdirection, &dk_yk));
499     if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON) {
500       cg_restart = PETSC_TRUE;
501       ++cg->skipped_updates;
502     }
503     if (cg->spaced_restart) {
504       CHKERRQ(VecGetSize(tao->gradient, &dim));
505       if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE;
506     }
507   }
508   /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */
509   if (cg->spaced_restart) {
510     CHKERRQ(VecGetSize(tao->gradient, &dim));
511     if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE;
512   }
513   /* Compute the diagonal scaling vector if applicable */
514   if (cg->diag_scaling) {
515     CHKERRQ(MatLMVMUpdate(cg->B, tao->solution, tao->gradient));
516   }
517 
518   /* A note on diagonal scaling (to be added to paper):
519    For the FR, PR, PRP, and DY methods, the diagonally scaled versions
520    must be derived as a preconditioned CG method rather than as
521    a Hessian initialization like in the Broyden methods. */
522 
523   /* In that case, one writes the objective function as
524    f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k).
525    Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to
526    HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the
527    same under preconditioning. Note that A is diagonal, such that A^T = A. */
528 
529   /* This yields questions like what the dot product d_k^T y_k
530    should look like. HZ mistakenly treats that as the same under
531    preconditioning, but that is not necessarily true. */
532 
533   /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation,
534    we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}),
535    yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is
536    NOT the same if our preconditioning matrix is updated between iterations.
537    This same issue is found when considering dot products of the form g_{k+1}^T y_k. */
538 
539   /* Compute CG step direction */
540   if (cg_restart) {
541     CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2));
542   } else if (pcgd_fallback) {
543     /* Just like preconditioned CG */
544     CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
545     CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work));
546   } else if (ynorm2 > PETSC_MACHINE_EPSILON) {
547     switch (cg->cg_type) {
548     case CG_PCGradientDescent:
549       if (!cg->diag_scaling) {
550         if (!cg->no_scaling) {
551         cg->sts = step*step*dnorm*dnorm;
552         CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha));
553         } else {
554           tau_k = 1.0;
555           ++cg->pure_gd_steps;
556         }
557         CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient));
558       } else {
559         CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
560         CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work));
561       }
562       break;
563 
564     case CG_HestenesStiefel:
565       /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */
566       if (!cg->diag_scaling) {
567         cg->sts = step*step*dnorm*dnorm;
568         CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk));
569         CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha));
570         beta = tau_k*gkp1_yk/dk_yk;
571         CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
572       } else {
573         CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
574         CHKERRQ(VecDot(cg->yk, cg->g_work, &gkp1_yk));
575         beta = gkp1_yk/dk_yk;
576         CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
577       }
578       break;
579 
580     case CG_FletcherReeves:
581       CHKERRQ(VecDot(cg->G_old, cg->G_old, &gnorm2_old));
582       CHKERRQ(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient));
583       CHKERRQ(VecNorm(cg->yk, NORM_2, &ynorm));
584       ynorm2 = ynorm*ynorm;
585       CHKERRQ(VecDot(cg->yk, tao->stepdirection, &dk_yk));
586       if (!cg->diag_scaling) {
587         CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha));
588         beta = tau_k*gnorm2/gnorm2_old;
589         CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
590       } else {
591         CHKERRQ(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */
592         CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
593         CHKERRQ(VecDot(tao->gradient, cg->g_work, &tmp));
594         beta = tmp/gnorm2_old;
595         CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
596       }
597       break;
598 
599     case CG_PolakRibierePolyak:
600       snorm = step*dnorm;
601       if (!cg->diag_scaling) {
602         CHKERRQ(VecDot(cg->G_old, cg->G_old, &gnorm2_old));
603         CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk));
604         CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha));
605         beta = tau_k*gkp1_yk/gnorm2_old;
606         CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
607       } else {
608         CHKERRQ(VecDot(cg->G_old, cg->g_work, &gnorm2_old));
609         CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
610         CHKERRQ(VecDot(cg->g_work, cg->yk, &gkp1_yk));
611         beta = gkp1_yk/gnorm2_old;
612         CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
613       }
614       break;
615 
616     case CG_PolakRibierePlus:
617       CHKERRQ(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient));
618       CHKERRQ(VecNorm(cg->yk, NORM_2, &ynorm));
619       ynorm2 = ynorm*ynorm;
620       if (!cg->diag_scaling) {
621         CHKERRQ(VecDot(cg->G_old, cg->G_old, &gnorm2_old));
622         CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk));
623         CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha));
624         beta = tau_k*gkp1_yk/gnorm2_old;
625         beta = PetscMax(beta, 0.0);
626         CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
627       } else {
628         CHKERRQ(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */
629         CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
630         CHKERRQ(VecDot(cg->g_work, cg->yk, &gkp1_yk));
631         beta = gkp1_yk/gnorm2_old;
632         beta = PetscMax(beta, 0.0);
633         CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
634       }
635       break;
636 
637     case CG_DaiYuan:
638       /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property."
639          SIAM Journal on optimization 10, no. 1 (1999): 177-182. */
640       if (!cg->diag_scaling) {
641         CHKERRQ(VecDot(tao->stepdirection, tao->gradient, &gd));
642         CHKERRQ(VecDot(cg->G_old, tao->stepdirection, &gd_old));
643         CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha));
644         beta = tau_k*gnorm2/(gd - gd_old);
645         CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
646       } else {
647         CHKERRQ(MatMult(cg->B, tao->stepdirection, cg->d_work));
648         CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
649         CHKERRQ(VecDot(cg->g_work, tao->gradient, &gtDg));
650         CHKERRQ(VecDot(tao->stepdirection, cg->G_old, &gd_old));
651         CHKERRQ(VecDot(cg->d_work, cg->g_work, &dk_yk));
652         dk_yk = dk_yk - gd_old;
653         beta = gtDg/dk_yk;
654         CHKERRQ(VecScale(cg->d_work, beta));
655         CHKERRQ(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work));
656       }
657       break;
658 
659     case CG_HagerZhang:
660       /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent."
661          ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */
662       CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd));
663       CHKERRQ(VecDot(cg->G_old, tao->stepdirection, &gd_old));
664       CHKERRQ(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
665       snorm = dnorm*step;
666       cg->yts = step*dk_yk;
667       if (cg->use_dynamic_restart) {
668         CHKERRQ(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold));
669       }
670       if (cg->dynamic_restart) {
671         CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2));
672       } else {
673         if (!cg->diag_scaling) {
674           CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk));
675           CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha));
676           /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */
677           tmp = gd/dk_yk;
678           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk));
679           /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */
680           beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
681           /* d <- -t*g + beta*t*d */
682           CHKERRQ(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient));
683         } else {
684           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
685           cg->yty = ynorm2;
686           cg->sts = snorm*snorm;
687           /* Apply the diagonal scaling to all my vectors */
688           CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
689           CHKERRQ(MatSolve(cg->B, cg->yk, cg->y_work));
690           CHKERRQ(MatSolve(cg->B, tao->stepdirection, cg->d_work));
691           /* Construct the constant ytDgkp1 */
692           CHKERRQ(VecDot(cg->yk, cg->g_work, &gkp1_yk));
693           /* Construct the constant for scaling Dkyk in the update */
694           tmp = gd/dk_yk;
695           CHKERRQ(VecDot(cg->yk, cg->y_work, &tau_k));
696           tau_k = -tau_k*gd/(dk_yk*dk_yk);
697           /* beta is the constant which adds the dk contribution */
698           beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */
699           /* From HZ2013, modified to account for diagonal scaling*/
700           CHKERRQ(VecDot(cg->G_old, cg->d_work, &gd_old));
701           CHKERRQ(VecDot(tao->stepdirection, cg->g_work, &gd));
702           beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
703           /* Do the update */
704           CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
705         }
706       }
707       break;
708 
709     case CG_DaiKou:
710       /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search."
711          SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */
712       CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd));
713       CHKERRQ(VecDot(cg->G_old, tao->stepdirection, &gd_old));
714       CHKERRQ(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
715       snorm = step*dnorm;
716       cg->yts = dk_yk*step;
717       if (!cg->diag_scaling) {
718         CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk));
719         CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha));
720         /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */
721         tmp = gd/dk_yk;
722         beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk;
723         beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
724         /* d <- -t*g + beta*t*d */
725         CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk));
726       } else {
727         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
728         cg->yty = ynorm2;
729         cg->sts = snorm*snorm;
730         CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
731         CHKERRQ(MatSolve(cg->B, cg->yk, cg->y_work));
732         CHKERRQ(MatSolve(cg->B, tao->stepdirection, cg->d_work));
733         /* Construct the constant ytDgkp1 */
734         CHKERRQ(VecDot(cg->yk, cg->g_work, &gkp1_yk));
735         CHKERRQ(VecDot(cg->yk, cg->y_work, &tau_k));
736         tau_k = tau_k*gd/(dk_yk*dk_yk);
737         tmp = gd/dk_yk;
738         /* beta is the constant which adds the dk contribution */
739         beta = gkp1_yk/dk_yk - step*tmp - tau_k;
740         /* Update this for the last term in beta */
741         CHKERRQ(VecDot(cg->y_work, tao->stepdirection, &dk_yk));
742         beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */
743         CHKERRQ(VecDot(tao->stepdirection, cg->g_work, &gd));
744         CHKERRQ(VecDot(cg->G_old, cg->d_work, &gd_old));
745         beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
746         /* Do the update */
747         CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
748       }
749       break;
750 
751     case CG_KouDai:
752       /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization."
753          Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */
754       CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd));
755       CHKERRQ(VecDot(cg->G_old, tao->stepdirection, &gd_old));
756       CHKERRQ(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
757       snorm = step*dnorm;
758       cg->yts = dk_yk*step;
759       if (cg->use_dynamic_restart) {
760         CHKERRQ(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold));
761       }
762       if (cg->dynamic_restart) {
763         CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2));
764       } else {
765         if (!cg->diag_scaling) {
766           CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk));
767           CHKERRQ(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha));
768           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk;
769           if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */
770           {
771             beta = cg->zeta*tau_k*gd/(dnorm*dnorm);
772             gamma = 0.0;
773           } else {
774             if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk;
775             /* This seems to be very effective when there's no tau_k scaling.
776                This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */
777             else {
778               gamma = cg->xi*gd/dk_yk;
779             }
780           }
781           /* d <- -t*g + beta*t*d + t*tmp*yk */
782           CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk));
783         } else {
784           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
785           cg->yty = ynorm2;
786           cg->sts = snorm*snorm;
787           CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
788           CHKERRQ(MatSolve(cg->B, cg->yk, cg->y_work));
789           /* Construct the constant ytDgkp1 */
790           CHKERRQ(VecDot(cg->yk, cg->g_work, &gkp1D_yk));
791           /* Construct the constant for scaling Dkyk in the update */
792           gamma = gd/dk_yk;
793           /* tau_k = -ytDy/(ytd)^2 * gd */
794           CHKERRQ(VecDot(cg->yk, cg->y_work, &tau_k));
795           tau_k = tau_k*gd/(dk_yk*dk_yk);
796           /* beta is the constant which adds the d_k contribution */
797           beta = gkp1D_yk/dk_yk - step*gamma - tau_k;
798           /* Here is the requisite check */
799           CHKERRQ(VecDot(tao->stepdirection, cg->g_work, &tmp));
800           if (cg->neg_xi) {
801             /* modified KD implementation */
802             if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk;
803             else {
804               gamma = cg->xi*gd/dk_yk;
805             }
806             if (beta < cg->zeta*tmp/(dnorm*dnorm)) {
807               beta = cg->zeta*tmp/(dnorm*dnorm);
808               gamma = 0.0;
809             }
810           } else { /* original KD 2015 implementation */
811             if (beta < cg->zeta*tmp/(dnorm*dnorm)) {
812               beta = cg->zeta*tmp/(dnorm*dnorm);
813               gamma = 0.0;
814             } else {
815               gamma = cg->xi*gd/dk_yk;
816             }
817           }
818           /* Do the update in two steps */
819           CHKERRQ(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work));
820           CHKERRQ(VecAXPY(tao->stepdirection, gamma, cg->y_work));
821         }
822       }
823       break;
824 
825     case CG_SSML_BFGS:
826       /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory."
827          Discussion Papers 269 (1977). */
828       CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd));
829       CHKERRQ(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
830       snorm = step*dnorm;
831       cg->yts = dk_yk*step;
832       cg->yty = ynorm2;
833       cg->sts = snorm*snorm;
834       if (!cg->diag_scaling) {
835         CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk));
836         CHKERRQ(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha));
837         tmp = gd/dk_yk;
838         beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp;
839         /* d <- -t*g + beta*t*d + t*tmp*yk */
840         CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk));
841       } else {
842         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
843         CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
844         CHKERRQ(MatSolve(cg->B, cg->yk, cg->y_work));
845         /* compute scalar gamma */
846         CHKERRQ(VecDot(cg->g_work, cg->yk, &gkp1_yk));
847         CHKERRQ(VecDot(cg->y_work, cg->yk, &tmp));
848         gamma = gd/dk_yk;
849         /* Compute scalar beta */
850         beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk;
851         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
852         CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work));
853       }
854       break;
855 
856     case CG_SSML_DFP:
857       CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd));
858       CHKERRQ(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
859       snorm = step*dnorm;
860       cg->yts = dk_yk*step;
861       cg->yty = ynorm2;
862       cg->sts = snorm*snorm;
863       if (!cg->diag_scaling) {
864         /* Instead of a regular convex combination, we will solve a quadratic formula. */
865         CHKERRQ(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha));
866         CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk));
867         tau_k = cg->dfp_scale*tau_k;
868         tmp = tau_k*gkp1_yk/cg->yty;
869         beta = -step*gd/dk_yk;
870         /* d <- -t*g + beta*d + tmp*yk */
871         CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk));
872       } else {
873         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */
874         CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
875         CHKERRQ(MatSolve(cg->B, cg->yk, cg->y_work));
876         /* compute scalar gamma */
877         CHKERRQ(VecDot(cg->g_work, cg->yk, &gkp1_yk));
878         CHKERRQ(VecDot(cg->y_work, cg->yk, &tmp));
879         gamma = (gkp1_yk/tmp);
880         /* Compute scalar beta */
881         beta = -step*gd/dk_yk;
882         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
883         CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work));
884       }
885       break;
886 
887     case CG_SSML_BROYDEN:
888       CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd));
889       CHKERRQ(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution));
890       snorm = step*dnorm;
891       cg->yts = step*dk_yk;
892       cg->yty = ynorm2;
893       cg->sts = snorm*snorm;
894       if (!cg->diag_scaling) {
895         /* Instead of a regular convex combination, we will solve a quadratic formula. */
896         CHKERRQ(TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale));
897         CHKERRQ(TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale));
898         CHKERRQ(VecDot(cg->yk, tao->gradient, &gkp1_yk));
899         tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp;
900         /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0,
901            it should reproduce the tau_dfp scaling. Same with dfp_scale.   */
902         tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/cg->yty;
903         beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*gd/dk_yk;
904         /* d <- -t*g + beta*d + tmp*yk */
905         CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk));
906       } else {
907         /* We have diagonal scaling enabled */
908         CHKERRQ(MatSolve(cg->B, tao->gradient, cg->g_work));
909         CHKERRQ(MatSolve(cg->B, cg->yk, cg->y_work));
910         /* compute scalar gamma */
911         CHKERRQ(VecDot(cg->g_work, cg->yk, &gkp1_yk));
912         CHKERRQ(VecDot(cg->y_work, cg->yk, &tmp));
913         gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp);
914         /* Compute scalar beta */
915         beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk;
916         /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
917         CHKERRQ(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work));
918       }
919       break;
920 
921     default:
922       break;
923 
924     }
925   }
926   PetscFunctionReturn(0);
927 }
928 
929 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm)
930 {
931   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
932   TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING;
933   PetscReal                    step=1.0,gnorm2,gd,dnorm=0.0;
934   PetscReal                    gnorm2_old,f_old,resnorm, gnorm_old;
935   PetscBool                    pcgd_fallback = PETSC_FALSE;
936 
937   PetscFunctionBegin;
938   /* We are now going to perform a line search along the direction. */
939   /* Store solution and gradient info before it changes */
940   CHKERRQ(VecCopy(tao->solution, cg->X_old));
941   CHKERRQ(VecCopy(tao->gradient, cg->G_old));
942   CHKERRQ(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old));
943 
944   gnorm_old = gnorm;
945   gnorm2_old = gnorm_old*gnorm_old;
946   f_old = cg->f;
947   /* Perform bounded line search. If we are recycling a solution from a previous */
948   /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */
949   if (!(tao->recycle && 0 == tao->niter)) {
950     /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */
951     CHKERRQ(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0));
952     CHKERRQ(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status));
953     CHKERRQ(TaoAddLineSearchCounts(tao));
954 
955     /*  Check linesearch failure */
956     if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
957       ++cg->ls_fails;
958       if (cg->cg_type == CG_GradientDescent) {
959         /* Nothing left to do but fail out of the optimization */
960         step = 0.0;
961         tao->reason = TAO_DIVERGED_LS_FAILURE;
962       } else {
963         /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */
964         CHKERRQ(VecCopy(cg->X_old, tao->solution));
965         CHKERRQ(VecCopy(cg->G_old, tao->gradient));
966         CHKERRQ(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient));
967         gnorm = gnorm_old;
968         gnorm2 = gnorm2_old;
969         cg->f = f_old;
970 
971         /* Fall back on preconditioned CG (so long as you're not already using it) */
972         if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling) {
973           pcgd_fallback = PETSC_TRUE;
974           CHKERRQ(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback));
975 
976           CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2));
977           CHKERRQ(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
978 
979           CHKERRQ(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0));
980           CHKERRQ(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status));
981           CHKERRQ(TaoAddLineSearchCounts(tao));
982 
983           pcgd_fallback = PETSC_FALSE;
984           if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
985             /* Going to perform a regular gradient descent step. */
986             ++cg->ls_fails;
987             step = 0.0;
988           }
989         }
990         /* Fall back on the scaled gradient step */
991         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
992           ++cg->ls_fails;
993           CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2));
994           CHKERRQ(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
995           CHKERRQ(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0));
996           CHKERRQ(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status));
997           CHKERRQ(TaoAddLineSearchCounts(tao));
998         }
999 
1000         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
1001           /* Nothing left to do but fail out of the optimization */
1002           ++cg->ls_fails;
1003           step = 0.0;
1004           tao->reason = TAO_DIVERGED_LS_FAILURE;
1005         } else {
1006           /* One of the fallbacks worked. Set them both back equal to false. */
1007           pcgd_fallback = PETSC_FALSE;
1008         }
1009       }
1010     }
1011     /* Convergence test for line search failure */
1012     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
1013 
1014     /* Standard convergence test */
1015     CHKERRQ(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W));
1016     CHKERRQ(VecNorm(cg->W, NORM_2, &resnorm));
1017     PetscCheck(!PetscIsInfOrNanReal(resnorm),PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
1018     CHKERRQ(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its));
1019     CHKERRQ(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step));
1020     CHKERRQ((*tao->ops->convergencetest)(tao,tao->cnvP));
1021     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
1022   }
1023   /* Assert we have an updated step and we need at least one more iteration. */
1024   /* Calculate the next direction */
1025   /* Estimate the active set at the new solution */
1026   CHKERRQ(TaoBNCGEstimateActiveSet(tao, cg->as_type));
1027   /* Compute the projected gradient and its norm */
1028   CHKERRQ(VecCopy(cg->unprojected_gradient, tao->gradient));
1029   CHKERRQ(VecISSet(tao->gradient, cg->active_idx, 0.0));
1030   CHKERRQ(VecNorm(tao->gradient,NORM_2,&gnorm));
1031   gnorm2 = gnorm*gnorm;
1032 
1033   /* Calculate some quantities used in the StepDirectionUpdate. */
1034   CHKERRQ(VecNorm(tao->stepdirection, NORM_2, &dnorm));
1035   /* Update the step direction. */
1036   CHKERRQ(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback));
1037   ++tao->niter;
1038   CHKERRQ(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
1039 
1040   if (cg->cg_type != CG_GradientDescent) {
1041     /* Figure out which previously active variables became inactive this iteration */
1042     CHKERRQ(ISDestroy(&cg->new_inactives));
1043     if (cg->inactive_idx && cg->inactive_old) {
1044       CHKERRQ(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives));
1045     }
1046     /* Selectively reset the CG step those freshly inactive variables */
1047     if (cg->new_inactives) {
1048       CHKERRQ(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step));
1049       CHKERRQ(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad));
1050       CHKERRQ(VecCopy(cg->inactive_grad, cg->inactive_step));
1051       CHKERRQ(VecScale(cg->inactive_step, -1.0));
1052       CHKERRQ(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step));
1053       CHKERRQ(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad));
1054     }
1055     /* Verify that this is a descent direction */
1056     CHKERRQ(VecDot(tao->gradient, tao->stepdirection, &gd));
1057     CHKERRQ(VecNorm(tao->stepdirection, NORM_2, &dnorm));
1058     if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) {
1059       /* Not a descent direction, so we reset back to projected gradient descent */
1060       CHKERRQ(TaoBNCGResetUpdate(tao, gnorm2));
1061       CHKERRQ(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection));
1062       ++cg->descent_error;
1063     } else {
1064     }
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0)
1070 {
1071   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
1072 
1073   PetscFunctionBegin;
1074   CHKERRQ(PetscObjectReference((PetscObject)H0));
1075   cg->pc = H0;
1076   PetscFunctionReturn(0);
1077 }
1078