xref: /petsc/src/tao/bound/impls/bncg/bncg.c (revision efa12513287cff49a2b9648ae83199dcbfaad71a)
1 #include <petsctaolinesearch.h>
2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/
3 #include <petscksp.h>
4 
5 #define CG_GradientDescent      0
6 #define CG_HestenesStiefel      1
7 #define CG_FletcherReeves       2
8 #define CG_PolakRibierePolyak   3
9 #define CG_PolakRibierePlus     4
10 #define CG_DaiYuan              5
11 #define CG_HagerZhang           6
12 #define CG_DaiKou               7
13 #define CG_KouDai               8
14 #define CG_SSML_BFGS            9
15 #define CG_SSML_DFP             10
16 #define CG_SSML_BROYDEN         11
17 #define CG_PCGradientDescent    12
18 #define CGTypes                 13
19 
20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"};
21 
22 #define CG_AS_NONE       0
23 #define CG_AS_BERTSEKAS  1
24 #define CG_AS_SIZE       2
25 
26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"};
27 
28 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType)
29 {
30   PetscErrorCode               ierr;
31   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;
32 
33   PetscFunctionBegin;
34   ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr);
35   if (cg->inactive_idx) {
36     ierr = ISDuplicate(cg->inactive_idx, &cg->inactive_old);CHKERRQ(ierr);
37     ierr = ISCopy(cg->inactive_idx, cg->inactive_old);CHKERRQ(ierr);
38   }
39   switch (asType) {
40   case CG_AS_NONE:
41     ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr);
42     ierr = VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx);CHKERRQ(ierr);
43     ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr);
44     ierr = ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx);CHKERRQ(ierr);
45     break;
46 
47   case CG_AS_BERTSEKAS:
48     /* Use gradient descent to estimate the active set */
49     ierr = VecCopy(cg->unprojected_gradient, cg->W);CHKERRQ(ierr);
50     ierr = VecScale(cg->W, -1.0);CHKERRQ(ierr);
51     ierr = TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol,
52                                    &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx);CHKERRQ(ierr);
53     break;
54 
55   default:
56     break;
57   }
58   PetscFunctionReturn(0);
59 }
60 
61 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step)
62 {
63   PetscErrorCode               ierr;
64   TAO_BNCG                     *cg = (TAO_BNCG *)tao->data;
65 
66   PetscFunctionBegin;
67   switch (asType) {
68   case CG_AS_NONE:
69     ierr = VecISSet(step, cg->active_idx, 0.0);CHKERRQ(ierr);
70     break;
71 
72   case CG_AS_BERTSEKAS:
73     ierr = TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step);CHKERRQ(ierr);
74     break;
75 
76   default:
77     break;
78   }
79   PetscFunctionReturn(0);
80 }
81 
82 static PetscErrorCode TaoSolve_BNCG(Tao tao)
83 {
84   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
85   PetscErrorCode               ierr;
86   PetscReal                    step=1.0,gnorm,gnorm2, resnorm;
87   PetscInt                     nDiff;
88 
89   PetscFunctionBegin;
90   /*   Project the current point onto the feasible set */
91   ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr);
92   ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr);
93 
94   /* Project the initial point onto the feasible region */
95   ierr = TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution);CHKERRQ(ierr);
96 
97   if (nDiff > 0 || !tao->recycle){
98     ierr = TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient);CHKERRQ(ierr);
99   }
100   ierr = VecNorm(cg->unprojected_gradient,NORM_2,&gnorm);CHKERRQ(ierr);
101   if (PetscIsInfOrNanReal(cg->f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
102 
103   /* Estimate the active set and compute the projected gradient */
104   ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr);
105 
106   /* Project the gradient and calculate the norm */
107   ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr);
108   ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr);
109   ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr);
110   gnorm2 = gnorm*gnorm;
111 
112   /* Initialize counters */
113   tao->niter = 0;
114   cg->ls_fails = cg->descent_error = 0;
115   cg->resets = -1;
116   cg->skipped_updates = cg->pure_gd_steps = 0;
117   cg->iter_quad = 0;
118 
119   /* Convergence test at the starting point. */
120   tao->reason = TAO_CONTINUE_ITERATING;
121 
122   ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr);
123   ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr);
124   if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
125   ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr);
126   ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr);
127   ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
128   if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
129   /* Calculate initial direction. */
130   if (!tao->recycle) {
131     /* We are not recycling a solution/history from a past TaoSolve */
132     ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
133   }
134   /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */
135   while (1) {
136     /* Call general purpose update function */
137     if (tao->ops->update) {
138       ierr = (*tao->ops->update)(tao, tao->niter, tao->user_update);CHKERRQ(ierr);
139     }
140     ierr = TaoBNCGConductIteration(tao, gnorm);CHKERRQ(ierr);
141     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
142   }
143 }
144 
145 static PetscErrorCode TaoSetUp_BNCG(Tao tao)
146 {
147   TAO_BNCG         *cg = (TAO_BNCG*)tao->data;
148   PetscErrorCode ierr;
149 
150   PetscFunctionBegin;
151   if (!tao->gradient) {
152     ierr = VecDuplicate(tao->solution,&tao->gradient);CHKERRQ(ierr);
153   }
154   if (!tao->stepdirection) {
155     ierr = VecDuplicate(tao->solution,&tao->stepdirection);CHKERRQ(ierr);
156   }
157   if (!cg->W) {
158     ierr = VecDuplicate(tao->solution,&cg->W);CHKERRQ(ierr);
159   }
160   if (!cg->work) {
161     ierr = VecDuplicate(tao->solution,&cg->work);CHKERRQ(ierr);
162   }
163   if (!cg->sk) {
164     ierr = VecDuplicate(tao->solution,&cg->sk);CHKERRQ(ierr);
165   }
166   if (!cg->yk) {
167     ierr = VecDuplicate(tao->gradient,&cg->yk);CHKERRQ(ierr);
168   }
169   if (!cg->X_old) {
170     ierr = VecDuplicate(tao->solution,&cg->X_old);CHKERRQ(ierr);
171   }
172   if (!cg->G_old) {
173     ierr = VecDuplicate(tao->gradient,&cg->G_old);CHKERRQ(ierr);
174   }
175   if (cg->diag_scaling){
176     ierr = VecDuplicate(tao->solution,&cg->d_work);CHKERRQ(ierr);
177     ierr = VecDuplicate(tao->solution,&cg->y_work);CHKERRQ(ierr);
178     ierr = VecDuplicate(tao->solution,&cg->g_work);CHKERRQ(ierr);
179   }
180   if (!cg->unprojected_gradient) {
181     ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient);CHKERRQ(ierr);
182   }
183   if (!cg->unprojected_gradient_old) {
184     ierr = VecDuplicate(tao->gradient,&cg->unprojected_gradient_old);CHKERRQ(ierr);
185   }
186   ierr = MatLMVMAllocate(cg->B, cg->sk, cg->yk);CHKERRQ(ierr);
187   if (cg->pc){
188     ierr = MatLMVMSetJ0(cg->B, cg->pc);CHKERRQ(ierr);
189   }
190   PetscFunctionReturn(0);
191 }
192 
193 static PetscErrorCode TaoDestroy_BNCG(Tao tao)
194 {
195   TAO_BNCG       *cg = (TAO_BNCG*) tao->data;
196   PetscErrorCode ierr;
197 
198   PetscFunctionBegin;
199   if (tao->setupcalled) {
200     ierr = VecDestroy(&cg->W);CHKERRQ(ierr);
201     ierr = VecDestroy(&cg->work);CHKERRQ(ierr);
202     ierr = VecDestroy(&cg->X_old);CHKERRQ(ierr);
203     ierr = VecDestroy(&cg->G_old);CHKERRQ(ierr);
204     ierr = VecDestroy(&cg->unprojected_gradient);CHKERRQ(ierr);
205     ierr = VecDestroy(&cg->unprojected_gradient_old);CHKERRQ(ierr);
206     ierr = VecDestroy(&cg->g_work);CHKERRQ(ierr);
207     ierr = VecDestroy(&cg->d_work);CHKERRQ(ierr);
208     ierr = VecDestroy(&cg->y_work);CHKERRQ(ierr);
209     ierr = VecDestroy(&cg->sk);CHKERRQ(ierr);
210     ierr = VecDestroy(&cg->yk);CHKERRQ(ierr);
211   }
212   ierr = ISDestroy(&cg->active_lower);CHKERRQ(ierr);
213   ierr = ISDestroy(&cg->active_upper);CHKERRQ(ierr);
214   ierr = ISDestroy(&cg->active_fixed);CHKERRQ(ierr);
215   ierr = ISDestroy(&cg->active_idx);CHKERRQ(ierr);
216   ierr = ISDestroy(&cg->inactive_idx);CHKERRQ(ierr);
217   ierr = ISDestroy(&cg->inactive_old);CHKERRQ(ierr);
218   ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr);
219   ierr = MatDestroy(&cg->B);CHKERRQ(ierr);
220   if (cg->pc) {
221     ierr = MatDestroy(&cg->pc);CHKERRQ(ierr);
222   }
223   ierr = PetscFree(tao->data);CHKERRQ(ierr);
224   PetscFunctionReturn(0);
225 }
226 
227 static PetscErrorCode TaoSetFromOptions_BNCG(PetscOptionItems *PetscOptionsObject,Tao tao)
228 {
229     TAO_BNCG       *cg = (TAO_BNCG*)tao->data;
230     PetscErrorCode ierr;
231 
232     PetscFunctionBegin;
233     ierr = TaoLineSearchSetFromOptions(tao->linesearch);CHKERRQ(ierr);
234     ierr = PetscOptionsHead(PetscOptionsObject,"Nonlinear Conjugate Gradient method for unconstrained optimization");CHKERRQ(ierr);
235     ierr = PetscOptionsEList("-tao_bncg_type","cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr);
236     if (cg->cg_type != CG_SSML_BFGS){
237       cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */
238     }
239     if (CG_GradientDescent == cg->cg_type){
240       cg->cg_type = CG_PCGradientDescent;
241       /* Set scaling equal to none or, at best, scalar scaling. */
242       cg->unscaled_restart = PETSC_TRUE;
243       cg->diag_scaling = PETSC_FALSE;
244     }
245     ierr = PetscOptionsEList("-tao_bncg_as_type","active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type,NULL);CHKERRQ(ierr);
246 
247     ierr = PetscOptionsReal("-tao_bncg_hz_eta","(developer) cutoff tolerance for HZ", "", cg->hz_eta,&cg->hz_eta,NULL);CHKERRQ(ierr);
248     ierr = PetscOptionsReal("-tao_bncg_eps","(developer) cutoff value for restarts", "", cg->epsilon,&cg->epsilon,NULL);CHKERRQ(ierr);
249     ierr = PetscOptionsReal("-tao_bncg_dk_eta","(developer) cutoff tolerance for DK", "", cg->dk_eta,&cg->dk_eta,NULL);CHKERRQ(ierr);
250     ierr = PetscOptionsReal("-tao_bncg_xi","(developer) Parameter in the KD method", "", cg->xi,&cg->xi,NULL);CHKERRQ(ierr);
251     ierr = PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL);CHKERRQ(ierr);
252     ierr = PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL);CHKERRQ(ierr);
253     ierr = PetscOptionsReal("-tao_bncg_alpha","(developer) parameter for the scalar scaling","",cg->alpha,&cg->alpha,NULL);CHKERRQ(ierr);
254     ierr = PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL);CHKERRQ(ierr);
255     ierr = PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL);CHKERRQ(ierr);
256     ierr = PetscOptionsBool("-tao_bncg_diag_scaling","Enable diagonal Broyden-like preconditioning","",cg->diag_scaling,&cg->diag_scaling,NULL);CHKERRQ(ierr);
257     ierr = PetscOptionsBool("-tao_bncg_dynamic_restart","(developer) use dynamic restarts as in HZ, DK, KD","",cg->use_dynamic_restart,&cg->use_dynamic_restart,NULL);CHKERRQ(ierr);
258     ierr = PetscOptionsBool("-tao_bncg_unscaled_restart","(developer) use unscaled gradient restarts","",cg->unscaled_restart,&cg->unscaled_restart,NULL);CHKERRQ(ierr);
259     ierr = PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL);CHKERRQ(ierr);
260     ierr = PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL);CHKERRQ(ierr);
261     ierr = PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL);CHKERRQ(ierr);
262     ierr = PetscOptionsBool("-tao_bncg_spaced_restart","(developer) Enable regular steepest descent restarting every fixed number of iterations","",cg->spaced_restart,&cg->spaced_restart,NULL);CHKERRQ(ierr);
263     ierr = PetscOptionsBool("-tao_bncg_no_scaling","Disable all scaling except in restarts","",cg->no_scaling,&cg->no_scaling,NULL);CHKERRQ(ierr);
264     if (cg->no_scaling){
265       cg->diag_scaling = PETSC_FALSE;
266       cg->alpha = -1.0;
267     }
268     if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling){ /* Some more default options that appear to be good. */
269       cg->neg_xi = PETSC_TRUE;
270     }
271     ierr = PetscOptionsBool("-tao_bncg_neg_xi","(developer) Use negative xi when it might be a smaller descent direction than necessary","",cg->neg_xi,&cg->neg_xi,NULL);CHKERRQ(ierr);
272     ierr = PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables","",cg->as_tol,&cg->as_tol,NULL);CHKERRQ(ierr);
273     ierr = PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables","",cg->as_step,&cg->as_step,NULL);CHKERRQ(ierr);
274     ierr = PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts","",cg->delta_min,&cg->delta_min,NULL);CHKERRQ(ierr);
275     ierr = PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts","",cg->delta_max,&cg->delta_max,NULL);CHKERRQ(ierr);
276 
277    ierr = PetscOptionsTail();CHKERRQ(ierr);
278    ierr = MatSetFromOptions(cg->B);CHKERRQ(ierr);
279    PetscFunctionReturn(0);
280 }
281 
282 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer)
283 {
284   PetscBool      isascii;
285   TAO_BNCG       *cg = (TAO_BNCG*)tao->data;
286   PetscErrorCode ierr;
287 
288   PetscFunctionBegin;
289   ierr = PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii);CHKERRQ(ierr);
290   if (isascii) {
291     ierr = PetscViewerASCIIPushTab(viewer);CHKERRQ(ierr);
292     ierr = PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type]);CHKERRQ(ierr);
293     ierr = PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %i\n", cg->skipped_updates);CHKERRQ(ierr);
294     ierr = PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %i\n", cg->resets);CHKERRQ(ierr);
295     ierr = PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %i\n", cg->pure_gd_steps);CHKERRQ(ierr);
296     ierr = PetscViewerASCIIPrintf(viewer, "Not a descent direction: %i\n", cg->descent_error);CHKERRQ(ierr);
297     ierr = PetscViewerASCIIPrintf(viewer, "Line search fails: %i\n", cg->ls_fails);CHKERRQ(ierr);
298     if (cg->diag_scaling){
299       ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);CHKERRQ(ierr);
300       if (isascii) {
301         ierr = PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO);CHKERRQ(ierr);
302         ierr = MatView(cg->B, viewer);CHKERRQ(ierr);
303         ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr);
304       }
305     }
306     ierr = PetscViewerASCIIPopTab(viewer);CHKERRQ(ierr);
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha)
312 {
313   PetscReal            a, b, c, sig1, sig2;
314 
315   PetscFunctionBegin;
316   *scale = 0.0;
317 
318   if (1.0 == alpha){
319     *scale = yts/yty;
320   } else if (0.0 == alpha){
321     *scale = sts/yts;
322   }
323   else if (-1.0 == alpha) *scale = 1.0;
324   else {
325     a = yty;
326     b = yts;
327     c = sts;
328     a *= alpha;
329     b *= -(2.0*alpha - 1.0);
330     c *= alpha - 1.0;
331     sig1 = (-b + PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
332     sig2 = (-b - PetscSqrtReal(b*b - 4.0*a*c))/(2.0*a);
333     /* accept the positive root as the scalar */
334     if (sig1 > 0.0) {
335       *scale = sig1;
336     } else if (sig2 > 0.0) {
337       *scale = sig2;
338     } else {
339       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar");
340     }
341   }
342   PetscFunctionReturn(0);
343 }
344 
345 /*MC
346   TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method.
347 
348   Options Database Keys:
349 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled)
350 . -tao_bncg_eta <r> - restart tolerance
351 . -tao_bncg_type <taocg_type> - cg formula
352 . -tao_bncg_as_type <none,bertsekas> - active set estimation method
353 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation
354 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation
355 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision.
356 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method
357 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
358 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods
359 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method
360 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method
361 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method
362 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method
363 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True.
364 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods
365 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts
366 . -tao_bncg_zeta <r> - Scaling parameter in the KD method
367 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps
368 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps
369 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart
370 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem
371 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations
372 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults.
373 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions
374 
375   Notes:
376     CG formulas are:
377 + "gd" - Gradient Descent
378 . "fr" - Fletcher-Reeves
379 . "pr" - Polak-Ribiere-Polyak
380 . "prp" - Polak-Ribiere-Plus
381 . "hs" - Hestenes-Steifel
382 . "dy" - Dai-Yuan
383 . "ssml_bfgs" - Self-Scaling Memoryless BFGS
384 . "ssml_dfp"  - Self-Scaling Memoryless DFP
385 . "ssml_brdn" - Self-Scaling Memoryless Broyden
386 . "hz" - Hager-Zhang (CG_DESCENT 5.3)
387 . "dk" - Dai-Kou (2013)
388 - "kd" - Kou-Dai (2015)
389 
390   Level: beginner
391 M*/
392 
393 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao)
394 {
395   TAO_BNCG       *cg;
396   const char     *morethuente_type = TAOLINESEARCHMT;
397   PetscErrorCode ierr;
398 
399   PetscFunctionBegin;
400   tao->ops->setup = TaoSetUp_BNCG;
401   tao->ops->solve = TaoSolve_BNCG;
402   tao->ops->view = TaoView_BNCG;
403   tao->ops->setfromoptions = TaoSetFromOptions_BNCG;
404   tao->ops->destroy = TaoDestroy_BNCG;
405 
406   /* Override default settings (unless already changed) */
407   if (!tao->max_it_changed) tao->max_it = 2000;
408   if (!tao->max_funcs_changed) tao->max_funcs = 4000;
409 
410   /*  Note: nondefault values should be used for nonlinear conjugate gradient  */
411   /*  method.  In particular, gtol should be less that 0.5; the value used in  */
412   /*  Nocedal and Wright is 0.10.  We use the default values for the  */
413   /*  linesearch because it seems to work better. */
414   ierr = TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch);CHKERRQ(ierr);
415   ierr = PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1);CHKERRQ(ierr);
416   ierr = TaoLineSearchSetType(tao->linesearch, morethuente_type);CHKERRQ(ierr);
417   ierr = TaoLineSearchUseTaoRoutines(tao->linesearch, tao);CHKERRQ(ierr);
418   ierr = TaoLineSearchSetOptionsPrefix(tao->linesearch,tao->hdr.prefix);CHKERRQ(ierr);
419 
420   ierr = PetscNewLog(tao,&cg);CHKERRQ(ierr);
421   tao->data = (void*)cg;
422   ierr = KSPInitializePackage();CHKERRQ(ierr);
423   ierr = MatCreate(PetscObjectComm((PetscObject)tao), &cg->B);CHKERRQ(ierr);
424   ierr = PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1);CHKERRQ(ierr);
425   ierr = MatSetOptionsPrefix(cg->B, "tao_bncg_");CHKERRQ(ierr);
426   ierr = MatSetType(cg->B, MATLMVMDIAGBROYDEN);CHKERRQ(ierr);
427 
428   cg->pc = NULL;
429 
430   cg->dk_eta = 0.5;
431   cg->hz_eta = 0.4;
432   cg->dynamic_restart = PETSC_FALSE;
433   cg->unscaled_restart = PETSC_FALSE;
434   cg->no_scaling = PETSC_FALSE;
435   cg->delta_min = 1e-7;
436   cg->delta_max = 100;
437   cg->theta = 1.0;
438   cg->hz_theta = 1.0;
439   cg->dfp_scale = 1.0;
440   cg->bfgs_scale = 1.0;
441   cg->zeta = 0.1;
442   cg->min_quad = 6;
443   cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/
444   cg->xi = 1.0;
445   cg->neg_xi = PETSC_TRUE;
446   cg->spaced_restart = PETSC_FALSE;
447   cg->tol_quad = 1e-8;
448   cg->as_step = 0.001;
449   cg->as_tol = 0.001;
450   cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); /* Just a little tighter*/
451   cg->as_type = CG_AS_BERTSEKAS;
452   cg->cg_type = CG_SSML_BFGS;
453   cg->alpha = 1.0;
454   cg->diag_scaling = PETSC_TRUE;
455   PetscFunctionReturn(0);
456 }
457 
458 
459 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq)
460 {
461    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
462    PetscErrorCode    ierr;
463    PetscReal         scaling;
464 
465    PetscFunctionBegin;
466    ++cg->resets;
467    scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23);
468    scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling));
469    if (cg->unscaled_restart) {
470      scaling = 1.0;
471      ++cg->pure_gd_steps;
472    }
473    ierr = VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient);CHKERRQ(ierr);
474    /* Also want to reset our diagonal scaling with each restart */
475    if (cg->diag_scaling) {
476      ierr = MatLMVMReset(cg->B, PETSC_FALSE);CHKERRQ(ierr);
477    }
478    PetscFunctionReturn(0);
479  }
480 
481 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold)
482 {
483    TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
484    PetscReal         quadinterp;
485 
486    PetscFunctionBegin;
487    if (cg->f < cg->min_quad/10) {
488      *dynrestart = PETSC_FALSE;
489      PetscFunctionReturn(0);
490    } /* just skip this since this strategy doesn't work well for functions near zero */
491    quadinterp = 2.0*(cg->f - fold)/(stepsize*(gd + gd_old));
492    if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad;
493    else {
494      cg->iter_quad = 0;
495      *dynrestart = PETSC_FALSE;
496    }
497    if (cg->iter_quad >= cg->min_quad) {
498      cg->iter_quad = 0;
499      *dynrestart = PETSC_TRUE;
500    }
501    PetscFunctionReturn(0);
502  }
503 
504 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback)
505 {
506   TAO_BNCG          *cg = (TAO_BNCG*)tao->data;
507   PetscErrorCode    ierr;
508   PetscReal         gamma = 1.0, tau_k, beta;
509   PetscReal         tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk=1.0, gd;
510   PetscReal         gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg;
511   PetscInt          dim;
512   PetscBool         cg_restart = PETSC_FALSE;
513   PetscFunctionBegin;
514 
515   /* Local curvature check to see if we need to restart */
516   if (tao->niter >= 1 || tao->recycle){
517     ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
518     ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
519     ynorm2 = ynorm*ynorm;
520     ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
521     if (step*dnorm < PETSC_MACHINE_EPSILON || step*dk_yk < PETSC_MACHINE_EPSILON){
522       cg_restart = PETSC_TRUE;
523       ++cg->skipped_updates;
524     }
525     if (cg->spaced_restart) {
526       ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr);
527       if (tao->niter % (dim*cg->min_restart_num)) cg_restart = PETSC_TRUE;
528     }
529   }
530   /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */
531   if (cg->spaced_restart){
532     ierr = VecGetSize(tao->gradient, &dim);CHKERRQ(ierr);
533     if (0 == tao->niter % (6*dim)) cg_restart = PETSC_TRUE;
534   }
535   /* Compute the diagonal scaling vector if applicable */
536   if (cg->diag_scaling) {
537     ierr = MatLMVMUpdate(cg->B, tao->solution, tao->gradient);CHKERRQ(ierr);
538   }
539 
540   /* A note on diagonal scaling (to be added to paper):
541    For the FR, PR, PRP, and DY methods, the diagonally scaled versions
542    must be derived as a preconditioned CG method rather than as
543    a Hessian initialization like in the Broyden methods. */
544 
545   /* In that case, one writes the objective function as
546    f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k).
547    Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to
548    HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the
549    same under preconditioning. Note that A is diagonal, such that A^T = A. */
550 
551   /* This yields questions like what the dot product d_k^T y_k
552    should look like. HZ mistakenly treats that as the same under
553    preconditioning, but that is not necessarily true. */
554 
555   /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation,
556    we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}),
557    yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is
558    NOT the same if our preconditioning matrix is updated between iterations.
559    This same issue is found when considering dot products of the form g_{k+1}^T y_k. */
560 
561   /* Compute CG step direction */
562   if (cg_restart) {
563     ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
564   } else if (pcgd_fallback) {
565     /* Just like preconditioned CG */
566     ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
567     ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr);
568   } else if (ynorm2 > PETSC_MACHINE_EPSILON) {
569     switch (cg->cg_type) {
570     case CG_PCGradientDescent:
571       if (!cg->diag_scaling){
572         if (!cg->no_scaling){
573         cg->sts = step*step*dnorm*dnorm;
574         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
575         } else {
576           tau_k = 1.0;
577           ++cg->pure_gd_steps;
578         }
579         ierr = VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient);CHKERRQ(ierr);
580       } else {
581         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
582         ierr = VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work);CHKERRQ(ierr);
583       }
584       break;
585 
586     case CG_HestenesStiefel:
587       /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */
588       if (!cg->diag_scaling){
589         cg->sts = step*step*dnorm*dnorm;
590         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
591         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
592         beta = tau_k*gkp1_yk/dk_yk;
593         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
594       } else {
595         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
596         ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
597         beta = gkp1_yk/dk_yk;
598         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
599       }
600       break;
601 
602     case CG_FletcherReeves:
603       ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
604       ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
605       ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
606       ynorm2 = ynorm*ynorm;
607       ierr = VecDot(cg->yk, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
608       if (!cg->diag_scaling){
609         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, step*step*dnorm*dnorm, &tau_k, cg->alpha);CHKERRQ(ierr);
610         beta = tau_k*gnorm2/gnorm2_old;
611         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
612       } else {
613         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Before it's updated */
614         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
615         ierr = VecDot(tao->gradient, cg->g_work, &tmp);CHKERRQ(ierr);
616         beta = tmp/gnorm2_old;
617         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
618       }
619       break;
620 
621     case CG_PolakRibierePolyak:
622       snorm = step*dnorm;
623       if (!cg->diag_scaling){
624         ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
625         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
626         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
627         beta = tau_k*gkp1_yk/gnorm2_old;
628         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
629       } else {
630         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr);
631         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
632         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
633         beta = gkp1_yk/gnorm2_old;
634         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
635       }
636       break;
637 
638     case CG_PolakRibierePlus:
639       ierr = VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient);CHKERRQ(ierr);
640       ierr = VecNorm(cg->yk, NORM_2, &ynorm);CHKERRQ(ierr);
641       ynorm2 = ynorm*ynorm;
642       if (!cg->diag_scaling){
643         ierr = VecDot(cg->G_old, cg->G_old, &gnorm2_old);CHKERRQ(ierr);
644         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
645         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
646         beta = tau_k*gkp1_yk/gnorm2_old;
647         beta = PetscMax(beta, 0.0);
648         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
649       } else {
650         ierr = VecDot(cg->G_old, cg->g_work, &gnorm2_old);CHKERRQ(ierr); /* Old gtDg */
651         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
652         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
653         beta = gkp1_yk/gnorm2_old;
654         beta = PetscMax(beta, 0.0);
655         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
656       }
657       break;
658 
659     case CG_DaiYuan:
660       /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property."
661          SIAM Journal on optimization 10, no. 1 (1999): 177-182. */
662       if (!cg->diag_scaling){
663         ierr = VecDot(tao->stepdirection, tao->gradient, &gd);CHKERRQ(ierr);
664         ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
665         ierr = TaoBNCGComputeScalarScaling(ynorm2, step*dk_yk, cg->yts, &tau_k, cg->alpha);CHKERRQ(ierr);
666         beta = tau_k*gnorm2/(gd - gd_old);
667         ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
668       } else {
669         ierr = MatMult(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
670         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
671         ierr = VecDot(cg->g_work, tao->gradient, &gtDg);CHKERRQ(ierr);
672         ierr = VecDot(tao->stepdirection, cg->G_old, &gd_old);CHKERRQ(ierr);
673         ierr = VecDot(cg->d_work, cg->g_work, &dk_yk);CHKERRQ(ierr);
674         dk_yk = dk_yk - gd_old;
675         beta = gtDg/dk_yk;
676         ierr = VecScale(cg->d_work, beta);CHKERRQ(ierr);
677         ierr = VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work);CHKERRQ(ierr);
678       }
679       break;
680 
681     case CG_HagerZhang:
682       /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent."
683          ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */
684       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
685       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
686       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
687       snorm = dnorm*step;
688       cg->yts = step*dk_yk;
689       if (cg->use_dynamic_restart){
690         ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr);
691       }
692       if (cg->dynamic_restart){
693         ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
694       } else {
695         if (!cg->diag_scaling){
696           ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
697           ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
698           /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */
699           tmp = gd/dk_yk;
700           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk));
701           /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */
702           beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
703           /* d <- -t*g + beta*t*d */
704           ierr = VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient);CHKERRQ(ierr);
705         } else {
706           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
707           cg->yty = ynorm2;
708           cg->sts = snorm*snorm;
709           /* Apply the diagonal scaling to all my vectors */
710           ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
711           ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
712           ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
713           /* Construct the constant ytDgkp1 */
714           ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
715           /* Construct the constant for scaling Dkyk in the update */
716           tmp = gd/dk_yk;
717           ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
718           tau_k = -tau_k*gd/(dk_yk*dk_yk);
719           /* beta is the constant which adds the dk contribution */
720           beta = gkp1_yk/dk_yk + cg->hz_theta*tau_k; /* HZ; (1.15) from DK 2013 */
721           /* From HZ2013, modified to account for diagonal scaling*/
722           ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr);
723           ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr);
724           beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
725           /* Do the update */
726           ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
727         }
728       }
729       break;
730 
731     case CG_DaiKou:
732       /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search."
733          SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */
734       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
735       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
736       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
737       snorm = step*dnorm;
738       cg->yts = dk_yk*step;
739       if (!cg->diag_scaling){
740         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
741         ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
742         /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */
743         tmp = gd/dk_yk;
744         beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk) + gd/(dnorm*dnorm)) - step*gd/dk_yk;
745         beta = PetscMax(PetscMax(beta, cg->hz_eta*tau_k*gd_old/(dnorm*dnorm)), cg->dk_eta*tau_k*gd/(dnorm*dnorm));
746         /* d <- -t*g + beta*t*d */
747         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
748       } else {
749         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
750         cg->yty = ynorm2;
751         cg->sts = snorm*snorm;
752         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
753         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
754         ierr = MatSolve(cg->B, tao->stepdirection, cg->d_work);CHKERRQ(ierr);
755         /* Construct the constant ytDgkp1 */
756         ierr = VecDot(cg->yk, cg->g_work, &gkp1_yk);CHKERRQ(ierr);
757         ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
758         tau_k = tau_k*gd/(dk_yk*dk_yk);
759         tmp = gd/dk_yk;
760         /* beta is the constant which adds the dk contribution */
761         beta = gkp1_yk/dk_yk - step*tmp - tau_k;
762         /* Update this for the last term in beta */
763         ierr = VecDot(cg->y_work, tao->stepdirection, &dk_yk);CHKERRQ(ierr);
764         beta += tmp*dk_yk/(dnorm*dnorm); /* projection of y_work onto dk */
765         ierr = VecDot(tao->stepdirection, cg->g_work, &gd);CHKERRQ(ierr);
766         ierr = VecDot(cg->G_old, cg->d_work, &gd_old);CHKERRQ(ierr);
767         beta = PetscMax(PetscMax(beta, cg->hz_eta*gd_old/(dnorm*dnorm)), cg->dk_eta*gd/(dnorm*dnorm));
768         /* Do the update */
769         ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
770       }
771       break;
772 
773     case CG_KouDai:
774       /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization."
775          Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */
776       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
777       ierr = VecDot(cg->G_old, tao->stepdirection, &gd_old);CHKERRQ(ierr);
778       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
779       snorm = step*dnorm;
780       cg->yts = dk_yk*step;
781       if (cg->use_dynamic_restart){
782         ierr = TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold);CHKERRQ(ierr);
783       }
784       if (cg->dynamic_restart){
785         ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
786       } else {
787         if (!cg->diag_scaling){
788           ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
789           ierr = TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm*snorm, &tau_k, cg->alpha);CHKERRQ(ierr);
790           beta = tau_k*(gkp1_yk/dk_yk - ynorm2*gd/(dk_yk*dk_yk)) - step*gd/dk_yk;
791           if (beta < cg->zeta*tau_k*gd/(dnorm*dnorm)) /* 0.1 is KD's zeta parameter */
792           {
793             beta = cg->zeta*tau_k*gd/(dnorm*dnorm);
794             gamma = 0.0;
795           } else {
796             if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0*gd/dk_yk;
797             /* This seems to be very effective when there's no tau_k scaling.
798                This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */
799             else {
800               gamma = cg->xi*gd/dk_yk;
801             }
802           }
803           /* d <- -t*g + beta*t*d + t*tmp*yk */
804           ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
805         } else {
806           /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
807           cg->yty = ynorm2;
808           cg->sts = snorm*snorm;
809           ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
810           ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
811           /* Construct the constant ytDgkp1 */
812           ierr = VecDot(cg->yk, cg->g_work, &gkp1D_yk);CHKERRQ(ierr);
813           /* Construct the constant for scaling Dkyk in the update */
814           gamma = gd/dk_yk;
815           /* tau_k = -ytDy/(ytd)^2 * gd */
816           ierr = VecDot(cg->yk, cg->y_work, &tau_k);CHKERRQ(ierr);
817           tau_k = tau_k*gd/(dk_yk*dk_yk);
818           /* beta is the constant which adds the d_k contribution */
819           beta = gkp1D_yk/dk_yk - step*gamma - tau_k;
820           /* Here is the requisite check */
821           ierr = VecDot(tao->stepdirection, cg->g_work, &tmp);CHKERRQ(ierr);
822           if (cg->neg_xi){
823             /* modified KD implementation */
824             if (gkp1D_yk/dk_yk < 0) gamma = -1.0*gd/dk_yk;
825             else {
826               gamma = cg->xi*gd/dk_yk;
827             }
828             if (beta < cg->zeta*tmp/(dnorm*dnorm)){
829               beta = cg->zeta*tmp/(dnorm*dnorm);
830               gamma = 0.0;
831             }
832           } else { /* original KD 2015 implementation */
833             if (beta < cg->zeta*tmp/(dnorm*dnorm)) {
834               beta = cg->zeta*tmp/(dnorm*dnorm);
835               gamma = 0.0;
836             } else {
837               gamma = cg->xi*gd/dk_yk;
838             }
839           }
840           /* Do the update in two steps */
841           ierr = VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work);CHKERRQ(ierr);
842           ierr = VecAXPY(tao->stepdirection, gamma, cg->y_work);CHKERRQ(ierr);
843         }
844       }
845       break;
846 
847     case CG_SSML_BFGS:
848       /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory."
849          Discussion Papers 269 (1977). */
850       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
851       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
852       snorm = step*dnorm;
853       cg->yts = dk_yk*step;
854       cg->yty = ynorm2;
855       cg->sts = snorm*snorm;
856       if (!cg->diag_scaling){
857         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
858         ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
859         tmp = gd/dk_yk;
860         beta = tau_k*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*tmp;
861         /* d <- -t*g + beta*t*d + t*tmp*yk */
862         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp*tau_k, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
863       } else {
864         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */
865         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
866         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
867         /* compute scalar gamma */
868         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
869         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
870         gamma = gd/dk_yk;
871         /* Compute scalar beta */
872         beta = (gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk;
873         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
874         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
875       }
876       break;
877 
878     case CG_SSML_DFP:
879       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
880       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
881       snorm = step*dnorm;
882       cg->yts = dk_yk*step;
883       cg->yty = ynorm2;
884       cg->sts = snorm*snorm;
885       if (!cg->diag_scaling){
886         /* Instead of a regular convex combination, we will solve a quadratic formula. */
887         ierr = TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha);CHKERRQ(ierr);
888         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
889         tau_k = cg->dfp_scale*tau_k;
890         tmp = tau_k*gkp1_yk/cg->yty;
891         beta = -step*gd/dk_yk;
892         /* d <- -t*g + beta*d + tmp*yk */
893         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
894       } else {
895         /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */
896         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
897         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
898         /* compute scalar gamma */
899         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
900         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
901         gamma = (gkp1_yk/tmp);
902         /* Compute scalar beta */
903         beta = -step*gd/dk_yk;
904         /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
905         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
906       }
907       break;
908 
909     case CG_SSML_BROYDEN:
910       ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
911       ierr = VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution);CHKERRQ(ierr);
912       snorm = step*dnorm;
913       cg->yts = step*dk_yk;
914       cg->yty = ynorm2;
915       cg->sts = snorm*snorm;
916       if (!cg->diag_scaling){
917         /* Instead of a regular convex combination, we will solve a quadratic formula. */
918         ierr = TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_bfgs, cg->bfgs_scale);CHKERRQ(ierr);
919         ierr = TaoBNCGComputeScalarScaling(cg->yty, step*dk_yk, snorm*snorm, &tau_dfp, cg->dfp_scale);CHKERRQ(ierr);
920         ierr = VecDot(cg->yk, tao->gradient, &gkp1_yk);CHKERRQ(ierr);
921         tau_k = cg->theta*tau_bfgs + (1.0-cg->theta)*tau_dfp;
922         /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0,
923            it should reproduce the tau_dfp scaling. Same with dfp_scale.   */
924         tmp = cg->theta*tau_bfgs*gd/dk_yk + (1-cg->theta)*tau_dfp*gkp1_yk/cg->yty;
925         beta = cg->theta*tau_bfgs*(gkp1_yk/dk_yk - cg->yty*gd/(dk_yk*dk_yk)) - step*gd/dk_yk;
926         /* d <- -t*g + beta*d + tmp*yk */
927         ierr = VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk);CHKERRQ(ierr);
928       } else {
929         /* We have diagonal scaling enabled */
930         ierr = MatSolve(cg->B, tao->gradient, cg->g_work);CHKERRQ(ierr);
931         ierr = MatSolve(cg->B, cg->yk, cg->y_work);CHKERRQ(ierr);
932         /* compute scalar gamma */
933         ierr = VecDot(cg->g_work, cg->yk, &gkp1_yk);CHKERRQ(ierr);
934         ierr = VecDot(cg->y_work, cg->yk, &tmp);CHKERRQ(ierr);
935         gamma = cg->theta*gd/dk_yk + (1-cg->theta)*(gkp1_yk/tmp);
936         /* Compute scalar beta */
937         beta = cg->theta*(gkp1_yk/dk_yk - gd*tmp/(dk_yk*dk_yk)) - step*gd/dk_yk;
938         /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */
939         ierr = VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work);CHKERRQ(ierr);
940       }
941       break;
942 
943     default:
944       break;
945 
946     }
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm)
952 {
953   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
954   PetscErrorCode               ierr;
955   TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING;
956   PetscReal                    step=1.0,gnorm2,gd,dnorm=0.0;
957   PetscReal                    gnorm2_old,f_old,resnorm, gnorm_old;
958   PetscBool                    pcgd_fallback = PETSC_FALSE;
959 
960   PetscFunctionBegin;
961   /* We are now going to perform a line search along the direction. */
962   /* Store solution and gradient info before it changes */
963   ierr = VecCopy(tao->solution, cg->X_old);CHKERRQ(ierr);
964   ierr = VecCopy(tao->gradient, cg->G_old);CHKERRQ(ierr);
965   ierr = VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old);CHKERRQ(ierr);
966 
967   gnorm_old = gnorm;
968   gnorm2_old = gnorm_old*gnorm_old;
969   f_old = cg->f;
970   /* Perform bounded line search. If we are recycling a solution from a previous */
971   /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */
972   if (!(tao->recycle && 0 == tao->niter)){
973     /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */
974     ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
975     ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
976     ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
977 
978     /*  Check linesearch failure */
979     if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) {
980       ++cg->ls_fails;
981       if (cg->cg_type == CG_GradientDescent){
982         /* Nothing left to do but fail out of the optimization */
983         step = 0.0;
984         tao->reason = TAO_DIVERGED_LS_FAILURE;
985       } else {
986         /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */
987         ierr = VecCopy(cg->X_old, tao->solution);CHKERRQ(ierr);
988         ierr = VecCopy(cg->G_old, tao->gradient);CHKERRQ(ierr);
989         ierr = VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient);CHKERRQ(ierr);
990         gnorm = gnorm_old;
991         gnorm2 = gnorm2_old;
992         cg->f = f_old;
993 
994         /* Fall back on preconditioned CG (so long as you're not already using it) */
995         if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling){
996           pcgd_fallback = PETSC_TRUE;
997           ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback);CHKERRQ(ierr);
998 
999           ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1000           ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1001 
1002           ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
1003           ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
1004           ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
1005 
1006           pcgd_fallback = PETSC_FALSE;
1007           if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){
1008             /* Going to perform a regular gradient descent step. */
1009             ++cg->ls_fails;
1010             step = 0.0;
1011           }
1012         }
1013         /* Fall back on the scaled gradient step */
1014         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){
1015           ++cg->ls_fails;
1016           ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1017           ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1018           ierr = TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0);CHKERRQ(ierr);
1019           ierr = TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status);CHKERRQ(ierr);
1020           ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);
1021         }
1022 
1023         if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER){
1024           /* Nothing left to do but fail out of the optimization */
1025           ++cg->ls_fails;
1026           step = 0.0;
1027           tao->reason = TAO_DIVERGED_LS_FAILURE;
1028         } else {
1029           /* One of the fallbacks worked. Set them both back equal to false. */
1030           pcgd_fallback = PETSC_FALSE;
1031         }
1032       }
1033     }
1034     /* Convergence test for line search failure */
1035     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
1036 
1037     /* Standard convergence test */
1038     ierr = VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W);CHKERRQ(ierr);
1039     ierr = VecNorm(cg->W, NORM_2, &resnorm);CHKERRQ(ierr);
1040     if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PetscObjectComm((PetscObject)tao),PETSC_ERR_USER, "User provided compute function generated Inf or NaN");
1041     ierr = TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr);
1042     ierr = TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step);CHKERRQ(ierr);
1043     ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
1044     if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);
1045   }
1046   /* Assert we have an updated step and we need at least one more iteration. */
1047   /* Calculate the next direction */
1048   /* Estimate the active set at the new solution */
1049   ierr = TaoBNCGEstimateActiveSet(tao, cg->as_type);CHKERRQ(ierr);
1050   /* Compute the projected gradient and its norm */
1051   ierr = VecCopy(cg->unprojected_gradient, tao->gradient);CHKERRQ(ierr);
1052   ierr = VecISSet(tao->gradient, cg->active_idx, 0.0);CHKERRQ(ierr);
1053   ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr);
1054   gnorm2 = gnorm*gnorm;
1055 
1056   /* Calculate some quantities used in the StepDirectionUpdate. */
1057   ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr);
1058   /* Update the step direction. */
1059   ierr = TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback);CHKERRQ(ierr);
1060   ++tao->niter;
1061   ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1062 
1063   if (cg->cg_type != CG_GradientDescent) {
1064     /* Figure out which previously active variables became inactive this iteration */
1065     ierr = ISDestroy(&cg->new_inactives);CHKERRQ(ierr);
1066     if (cg->inactive_idx && cg->inactive_old) {
1067       ierr = ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives);CHKERRQ(ierr);
1068     }
1069     /* Selectively reset the CG step those freshly inactive variables */
1070     if (cg->new_inactives) {
1071       ierr = VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr);
1072       ierr = VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr);
1073       ierr = VecCopy(cg->inactive_grad, cg->inactive_step);CHKERRQ(ierr);
1074       ierr = VecScale(cg->inactive_step, -1.0);CHKERRQ(ierr);
1075       ierr = VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step);CHKERRQ(ierr);
1076       ierr = VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad);CHKERRQ(ierr);
1077     }
1078     /* Verify that this is a descent direction */
1079     ierr = VecDot(tao->gradient, tao->stepdirection, &gd);CHKERRQ(ierr);
1080     ierr = VecNorm(tao->stepdirection, NORM_2, &dnorm);CHKERRQ(ierr);
1081     if (PetscIsInfOrNanReal(gd) || (gd/(dnorm*dnorm) <= -1e10 || gd/(dnorm*dnorm) >= -1e-10)) {
1082       /* Not a descent direction, so we reset back to projected gradient descent */
1083       ierr = TaoBNCGResetUpdate(tao, gnorm2);CHKERRQ(ierr);
1084       ierr = TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection);CHKERRQ(ierr);
1085       ++cg->descent_error;
1086     } else {
1087     }
1088   }
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0)
1093 {
1094   TAO_BNCG                     *cg = (TAO_BNCG*)tao->data;
1095   PetscErrorCode               ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectReference((PetscObject)H0);CHKERRQ(ierr);
1099   cg->pc = H0;
1100   PetscFunctionReturn(0);
1101 }
1102