1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/ 3 #include <petscksp.h> 4 5 #define CG_GradientDescent 0 6 #define CG_HestenesStiefel 1 7 #define CG_FletcherReeves 2 8 #define CG_PolakRibierePolyak 3 9 #define CG_PolakRibierePlus 4 10 #define CG_DaiYuan 5 11 #define CG_HagerZhang 6 12 #define CG_DaiKou 7 13 #define CG_KouDai 8 14 #define CG_SSML_BFGS 9 15 #define CG_SSML_DFP 10 16 #define CG_SSML_BROYDEN 11 17 #define CG_PCGradientDescent 12 18 #define CGTypes 13 19 20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"}; 21 22 #define CG_AS_NONE 0 23 #define CG_AS_BERTSEKAS 1 24 #define CG_AS_SIZE 2 25 26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 27 28 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) { 29 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 30 31 PetscFunctionBegin; 32 PetscCall(ISDestroy(&cg->inactive_old)); 33 if (cg->inactive_idx) { 34 PetscCall(ISDuplicate(cg->inactive_idx, &cg->inactive_old)); 35 PetscCall(ISCopy(cg->inactive_idx, cg->inactive_old)); 36 } 37 switch (asType) { 38 case CG_AS_NONE: 39 PetscCall(ISDestroy(&cg->inactive_idx)); 40 PetscCall(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx)); 41 PetscCall(ISDestroy(&cg->active_idx)); 42 PetscCall(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx)); 43 break; 44 45 case CG_AS_BERTSEKAS: 46 /* Use gradient descent to estimate the active set */ 47 PetscCall(VecCopy(cg->unprojected_gradient, cg->W)); 48 PetscCall(VecScale(cg->W, -1.0)); 49 PetscCall(TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx)); 50 break; 51 default: break; 52 } 53 PetscFunctionReturn(0); 54 } 55 56 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) { 57 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 58 59 PetscFunctionBegin; 60 switch (asType) { 61 case CG_AS_NONE: PetscCall(VecISSet(step, cg->active_idx, 0.0)); break; 62 63 case CG_AS_BERTSEKAS: PetscCall(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step)); break; 64 65 default: break; 66 } 67 PetscFunctionReturn(0); 68 } 69 70 static PetscErrorCode TaoSolve_BNCG(Tao tao) { 71 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 72 PetscReal step = 1.0, gnorm, gnorm2, resnorm; 73 PetscInt nDiff; 74 75 PetscFunctionBegin; 76 /* Project the current point onto the feasible set */ 77 PetscCall(TaoComputeVariableBounds(tao)); 78 PetscCall(TaoLineSearchSetVariableBounds(tao->linesearch, tao->XL, tao->XU)); 79 80 /* Project the initial point onto the feasible region */ 81 PetscCall(TaoBoundSolution(tao->solution, tao->XL, tao->XU, 0.0, &nDiff, tao->solution)); 82 83 if (nDiff > 0 || !tao->recycle) { PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); } 84 PetscCall(VecNorm(cg->unprojected_gradient, NORM_2, &gnorm)); 85 PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 86 87 /* Estimate the active set and compute the projected gradient */ 88 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 89 90 /* Project the gradient and calculate the norm */ 91 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 92 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 93 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 94 gnorm2 = gnorm * gnorm; 95 96 /* Initialize counters */ 97 tao->niter = 0; 98 cg->ls_fails = cg->descent_error = 0; 99 cg->resets = -1; 100 cg->skipped_updates = cg->pure_gd_steps = 0; 101 cg->iter_quad = 0; 102 103 /* Convergence test at the starting point. */ 104 tao->reason = TAO_CONTINUE_ITERATING; 105 106 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 107 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 108 PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 109 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 110 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 111 PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 112 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 113 /* Calculate initial direction. */ 114 if (!tao->recycle) { 115 /* We are not recycling a solution/history from a past TaoSolve */ 116 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 117 } 118 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */ 119 while (1) { 120 /* Call general purpose update function */ 121 if (tao->ops->update) { 122 PetscUseTypeMethod(tao, update, tao->niter, tao->user_update); 123 PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 124 } 125 PetscCall(TaoBNCGConductIteration(tao, gnorm)); 126 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 127 } 128 } 129 130 static PetscErrorCode TaoSetUp_BNCG(Tao tao) { 131 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 132 133 PetscFunctionBegin; 134 if (!tao->gradient) { PetscCall(VecDuplicate(tao->solution, &tao->gradient)); } 135 if (!tao->stepdirection) { PetscCall(VecDuplicate(tao->solution, &tao->stepdirection)); } 136 if (!cg->W) { PetscCall(VecDuplicate(tao->solution, &cg->W)); } 137 if (!cg->work) { PetscCall(VecDuplicate(tao->solution, &cg->work)); } 138 if (!cg->sk) { PetscCall(VecDuplicate(tao->solution, &cg->sk)); } 139 if (!cg->yk) { PetscCall(VecDuplicate(tao->gradient, &cg->yk)); } 140 if (!cg->X_old) { PetscCall(VecDuplicate(tao->solution, &cg->X_old)); } 141 if (!cg->G_old) { PetscCall(VecDuplicate(tao->gradient, &cg->G_old)); } 142 if (cg->diag_scaling) { 143 PetscCall(VecDuplicate(tao->solution, &cg->d_work)); 144 PetscCall(VecDuplicate(tao->solution, &cg->y_work)); 145 PetscCall(VecDuplicate(tao->solution, &cg->g_work)); 146 } 147 if (!cg->unprojected_gradient) { PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient)); } 148 if (!cg->unprojected_gradient_old) { PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient_old)); } 149 PetscCall(MatLMVMAllocate(cg->B, cg->sk, cg->yk)); 150 if (cg->pc) PetscCall(MatLMVMSetJ0(cg->B, cg->pc)); 151 PetscFunctionReturn(0); 152 } 153 154 static PetscErrorCode TaoDestroy_BNCG(Tao tao) { 155 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 156 157 PetscFunctionBegin; 158 if (tao->setupcalled) { 159 PetscCall(VecDestroy(&cg->W)); 160 PetscCall(VecDestroy(&cg->work)); 161 PetscCall(VecDestroy(&cg->X_old)); 162 PetscCall(VecDestroy(&cg->G_old)); 163 PetscCall(VecDestroy(&cg->unprojected_gradient)); 164 PetscCall(VecDestroy(&cg->unprojected_gradient_old)); 165 PetscCall(VecDestroy(&cg->g_work)); 166 PetscCall(VecDestroy(&cg->d_work)); 167 PetscCall(VecDestroy(&cg->y_work)); 168 PetscCall(VecDestroy(&cg->sk)); 169 PetscCall(VecDestroy(&cg->yk)); 170 } 171 PetscCall(ISDestroy(&cg->active_lower)); 172 PetscCall(ISDestroy(&cg->active_upper)); 173 PetscCall(ISDestroy(&cg->active_fixed)); 174 PetscCall(ISDestroy(&cg->active_idx)); 175 PetscCall(ISDestroy(&cg->inactive_idx)); 176 PetscCall(ISDestroy(&cg->inactive_old)); 177 PetscCall(ISDestroy(&cg->new_inactives)); 178 PetscCall(MatDestroy(&cg->B)); 179 if (cg->pc) { PetscCall(MatDestroy(&cg->pc)); } 180 PetscCall(PetscFree(tao->data)); 181 PetscFunctionReturn(0); 182 } 183 184 static PetscErrorCode TaoSetFromOptions_BNCG(Tao tao, PetscOptionItems *PetscOptionsObject) { 185 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 186 187 PetscFunctionBegin; 188 PetscOptionsHeadBegin(PetscOptionsObject, "Nonlinear Conjugate Gradient method for unconstrained optimization"); 189 PetscCall(PetscOptionsEList("-tao_bncg_type", "cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type, NULL)); 190 if (cg->cg_type != CG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */ 191 if (CG_GradientDescent == cg->cg_type) { 192 cg->cg_type = CG_PCGradientDescent; 193 /* Set scaling equal to none or, at best, scalar scaling. */ 194 cg->unscaled_restart = PETSC_TRUE; 195 cg->diag_scaling = PETSC_FALSE; 196 } 197 PetscCall(PetscOptionsEList("-tao_bncg_as_type", "active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type, NULL)); 198 PetscCall(PetscOptionsReal("-tao_bncg_hz_eta", "(developer) cutoff tolerance for HZ", "", cg->hz_eta, &cg->hz_eta, NULL)); 199 PetscCall(PetscOptionsReal("-tao_bncg_eps", "(developer) cutoff value for restarts", "", cg->epsilon, &cg->epsilon, NULL)); 200 PetscCall(PetscOptionsReal("-tao_bncg_dk_eta", "(developer) cutoff tolerance for DK", "", cg->dk_eta, &cg->dk_eta, NULL)); 201 PetscCall(PetscOptionsReal("-tao_bncg_xi", "(developer) Parameter in the KD method", "", cg->xi, &cg->xi, NULL)); 202 PetscCall(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL)); 203 PetscCall(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL)); 204 PetscCall(PetscOptionsReal("-tao_bncg_alpha", "(developer) parameter for the scalar scaling", "", cg->alpha, &cg->alpha, NULL)); 205 PetscCall(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL)); 206 PetscCall(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL)); 207 PetscCall(PetscOptionsBool("-tao_bncg_diag_scaling", "Enable diagonal Broyden-like preconditioning", "", cg->diag_scaling, &cg->diag_scaling, NULL)); 208 PetscCall(PetscOptionsBool("-tao_bncg_dynamic_restart", "(developer) use dynamic restarts as in HZ, DK, KD", "", cg->use_dynamic_restart, &cg->use_dynamic_restart, NULL)); 209 PetscCall(PetscOptionsBool("-tao_bncg_unscaled_restart", "(developer) use unscaled gradient restarts", "", cg->unscaled_restart, &cg->unscaled_restart, NULL)); 210 PetscCall(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL)); 211 PetscCall(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL)); 212 PetscCall(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL)); 213 PetscCall(PetscOptionsBool("-tao_bncg_spaced_restart", "(developer) Enable regular steepest descent restarting every fixed number of iterations", "", cg->spaced_restart, &cg->spaced_restart, NULL)); 214 PetscCall(PetscOptionsBool("-tao_bncg_no_scaling", "Disable all scaling except in restarts", "", cg->no_scaling, &cg->no_scaling, NULL)); 215 if (cg->no_scaling) { 216 cg->diag_scaling = PETSC_FALSE; 217 cg->alpha = -1.0; 218 } 219 if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling) { /* Some more default options that appear to be good. */ 220 cg->neg_xi = PETSC_TRUE; 221 } 222 PetscCall(PetscOptionsBool("-tao_bncg_neg_xi", "(developer) Use negative xi when it might be a smaller descent direction than necessary", "", cg->neg_xi, &cg->neg_xi, NULL)); 223 PetscCall(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables", "", cg->as_tol, &cg->as_tol, NULL)); 224 PetscCall(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables", "", cg->as_step, &cg->as_step, NULL)); 225 PetscCall(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts", "", cg->delta_min, &cg->delta_min, NULL)); 226 PetscCall(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts", "", cg->delta_max, &cg->delta_max, NULL)); 227 228 PetscOptionsHeadEnd(); 229 PetscCall(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix)); 230 PetscCall(MatAppendOptionsPrefix(cg->B, "tao_bncg_")); 231 PetscCall(MatSetFromOptions(cg->B)); 232 PetscFunctionReturn(0); 233 } 234 235 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) { 236 PetscBool isascii; 237 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 238 239 PetscFunctionBegin; 240 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 241 if (isascii) { 242 PetscCall(PetscViewerASCIIPushTab(viewer)); 243 PetscCall(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type])); 244 PetscCall(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %" PetscInt_FMT "\n", cg->skipped_updates)); 245 PetscCall(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %" PetscInt_FMT "\n", cg->resets)); 246 PetscCall(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %" PetscInt_FMT "\n", cg->pure_gd_steps)); 247 PetscCall(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %" PetscInt_FMT "\n", cg->descent_error)); 248 PetscCall(PetscViewerASCIIPrintf(viewer, "Line search fails: %" PetscInt_FMT "\n", cg->ls_fails)); 249 if (cg->diag_scaling) { 250 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 251 if (isascii) { 252 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 253 PetscCall(MatView(cg->B, viewer)); 254 PetscCall(PetscViewerPopFormat(viewer)); 255 } 256 } 257 PetscCall(PetscViewerASCIIPopTab(viewer)); 258 } 259 PetscFunctionReturn(0); 260 } 261 262 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha) { 263 PetscReal a, b, c, sig1, sig2; 264 265 PetscFunctionBegin; 266 *scale = 0.0; 267 if (1.0 == alpha) *scale = yts / yty; 268 else if (0.0 == alpha) *scale = sts / yts; 269 else if (-1.0 == alpha) *scale = 1.0; 270 else { 271 a = yty; 272 b = yts; 273 c = sts; 274 a *= alpha; 275 b *= -(2.0 * alpha - 1.0); 276 c *= alpha - 1.0; 277 sig1 = (-b + PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a); 278 sig2 = (-b - PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a); 279 /* accept the positive root as the scalar */ 280 if (sig1 > 0.0) *scale = sig1; 281 else if (sig2 > 0.0) *scale = sig2; 282 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar"); 283 } 284 PetscFunctionReturn(0); 285 } 286 287 /*MC 288 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 289 290 Options Database Keys: 291 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled) 292 . -tao_bncg_eta <r> - restart tolerance 293 . -tao_bncg_type <taocg_type> - cg formula 294 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 295 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 296 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 297 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision. 298 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method 299 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 300 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 301 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method 302 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method 303 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method 304 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method 305 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True. 306 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods 307 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts 308 . -tao_bncg_zeta <r> - Scaling parameter in the KD method 309 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps 310 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps 311 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart 312 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem 313 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations 314 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults. 315 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions 316 317 Notes: 318 CG formulas are: 319 + "gd" - Gradient Descent 320 . "fr" - Fletcher-Reeves 321 . "pr" - Polak-Ribiere-Polyak 322 . "prp" - Polak-Ribiere-Plus 323 . "hs" - Hestenes-Steifel 324 . "dy" - Dai-Yuan 325 . "ssml_bfgs" - Self-Scaling Memoryless BFGS 326 . "ssml_dfp" - Self-Scaling Memoryless DFP 327 . "ssml_brdn" - Self-Scaling Memoryless Broyden 328 . "hz" - Hager-Zhang (CG_DESCENT 5.3) 329 . "dk" - Dai-Kou (2013) 330 - "kd" - Kou-Dai (2015) 331 332 Level: beginner 333 M*/ 334 335 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) { 336 TAO_BNCG *cg; 337 const char *morethuente_type = TAOLINESEARCHMT; 338 339 PetscFunctionBegin; 340 tao->ops->setup = TaoSetUp_BNCG; 341 tao->ops->solve = TaoSolve_BNCG; 342 tao->ops->view = TaoView_BNCG; 343 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 344 tao->ops->destroy = TaoDestroy_BNCG; 345 346 /* Override default settings (unless already changed) */ 347 if (!tao->max_it_changed) tao->max_it = 2000; 348 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 349 350 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 351 /* method. In particular, gtol should be less that 0.5; the value used in */ 352 /* Nocedal and Wright is 0.10. We use the default values for the */ 353 /* linesearch because it seems to work better. */ 354 PetscCall(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch)); 355 PetscCall(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1)); 356 PetscCall(TaoLineSearchSetType(tao->linesearch, morethuente_type)); 357 PetscCall(TaoLineSearchUseTaoRoutines(tao->linesearch, tao)); 358 359 PetscCall(PetscNewLog(tao, &cg)); 360 tao->data = (void *)cg; 361 PetscCall(KSPInitializePackage()); 362 PetscCall(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B)); 363 PetscCall(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1)); 364 PetscCall(MatSetType(cg->B, MATLMVMDIAGBROYDEN)); 365 366 cg->pc = NULL; 367 368 cg->dk_eta = 0.5; 369 cg->hz_eta = 0.4; 370 cg->dynamic_restart = PETSC_FALSE; 371 cg->unscaled_restart = PETSC_FALSE; 372 cg->no_scaling = PETSC_FALSE; 373 cg->delta_min = 1e-7; 374 cg->delta_max = 100; 375 cg->theta = 1.0; 376 cg->hz_theta = 1.0; 377 cg->dfp_scale = 1.0; 378 cg->bfgs_scale = 1.0; 379 cg->zeta = 0.1; 380 cg->min_quad = 6; 381 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/ 382 cg->xi = 1.0; 383 cg->neg_xi = PETSC_TRUE; 384 cg->spaced_restart = PETSC_FALSE; 385 cg->tol_quad = 1e-8; 386 cg->as_step = 0.001; 387 cg->as_tol = 0.001; 388 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0 / 3.0); /* Just a little tighter*/ 389 cg->as_type = CG_AS_BERTSEKAS; 390 cg->cg_type = CG_SSML_BFGS; 391 cg->alpha = 1.0; 392 cg->diag_scaling = PETSC_TRUE; 393 PetscFunctionReturn(0); 394 } 395 396 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq) { 397 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 398 PetscReal scaling; 399 400 PetscFunctionBegin; 401 ++cg->resets; 402 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23); 403 scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling)); 404 if (cg->unscaled_restart) { 405 scaling = 1.0; 406 ++cg->pure_gd_steps; 407 } 408 PetscCall(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient)); 409 /* Also want to reset our diagonal scaling with each restart */ 410 if (cg->diag_scaling) PetscCall(MatLMVMReset(cg->B, PETSC_FALSE)); 411 PetscFunctionReturn(0); 412 } 413 414 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold) { 415 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 416 PetscReal quadinterp; 417 418 PetscFunctionBegin; 419 if (cg->f < cg->min_quad / 10) { 420 *dynrestart = PETSC_FALSE; 421 PetscFunctionReturn(0); 422 } /* just skip this since this strategy doesn't work well for functions near zero */ 423 quadinterp = 2.0 * (cg->f - fold) / (stepsize * (gd + gd_old)); 424 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad; 425 else { 426 cg->iter_quad = 0; 427 *dynrestart = PETSC_FALSE; 428 } 429 if (cg->iter_quad >= cg->min_quad) { 430 cg->iter_quad = 0; 431 *dynrestart = PETSC_TRUE; 432 } 433 PetscFunctionReturn(0); 434 } 435 436 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback) { 437 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 438 PetscReal gamma = 1.0, tau_k, beta; 439 PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk = 1.0, gd; 440 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg; 441 PetscInt dim; 442 PetscBool cg_restart = PETSC_FALSE; 443 PetscFunctionBegin; 444 445 /* Local curvature check to see if we need to restart */ 446 if (tao->niter >= 1 || tao->recycle) { 447 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 448 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 449 ynorm2 = ynorm * ynorm; 450 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 451 if (step * dnorm < PETSC_MACHINE_EPSILON || step * dk_yk < PETSC_MACHINE_EPSILON) { 452 cg_restart = PETSC_TRUE; 453 ++cg->skipped_updates; 454 } 455 if (cg->spaced_restart) { 456 PetscCall(VecGetSize(tao->gradient, &dim)); 457 if (tao->niter % (dim * cg->min_restart_num)) cg_restart = PETSC_TRUE; 458 } 459 } 460 /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */ 461 if (cg->spaced_restart) { 462 PetscCall(VecGetSize(tao->gradient, &dim)); 463 if (0 == tao->niter % (6 * dim)) cg_restart = PETSC_TRUE; 464 } 465 /* Compute the diagonal scaling vector if applicable */ 466 if (cg->diag_scaling) PetscCall(MatLMVMUpdate(cg->B, tao->solution, tao->gradient)); 467 468 /* A note on diagonal scaling (to be added to paper): 469 For the FR, PR, PRP, and DY methods, the diagonally scaled versions 470 must be derived as a preconditioned CG method rather than as 471 a Hessian initialization like in the Broyden methods. */ 472 473 /* In that case, one writes the objective function as 474 f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k). 475 Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to 476 HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the 477 same under preconditioning. Note that A is diagonal, such that A^T = A. */ 478 479 /* This yields questions like what the dot product d_k^T y_k 480 should look like. HZ mistakenly treats that as the same under 481 preconditioning, but that is not necessarily true. */ 482 483 /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation, 484 we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}), 485 yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is 486 NOT the same if our preconditioning matrix is updated between iterations. 487 This same issue is found when considering dot products of the form g_{k+1}^T y_k. */ 488 489 /* Compute CG step direction */ 490 if (cg_restart) { 491 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 492 } else if (pcgd_fallback) { 493 /* Just like preconditioned CG */ 494 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 495 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 496 } else if (ynorm2 > PETSC_MACHINE_EPSILON) { 497 switch (cg->cg_type) { 498 case CG_PCGradientDescent: 499 if (!cg->diag_scaling) { 500 if (!cg->no_scaling) { 501 cg->sts = step * step * dnorm * dnorm; 502 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha)); 503 } else { 504 tau_k = 1.0; 505 ++cg->pure_gd_steps; 506 } 507 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient)); 508 } else { 509 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 510 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 511 } 512 break; 513 514 case CG_HestenesStiefel: 515 /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */ 516 if (!cg->diag_scaling) { 517 cg->sts = step * step * dnorm * dnorm; 518 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 519 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha)); 520 beta = tau_k * gkp1_yk / dk_yk; 521 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 522 } else { 523 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 524 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 525 beta = gkp1_yk / dk_yk; 526 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 527 } 528 break; 529 530 case CG_FletcherReeves: 531 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 532 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 533 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 534 ynorm2 = ynorm * ynorm; 535 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 536 if (!cg->diag_scaling) { 537 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, step * step * dnorm * dnorm, &tau_k, cg->alpha)); 538 beta = tau_k * gnorm2 / gnorm2_old; 539 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 540 } else { 541 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */ 542 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 543 PetscCall(VecDot(tao->gradient, cg->g_work, &tmp)); 544 beta = tmp / gnorm2_old; 545 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 546 } 547 break; 548 549 case CG_PolakRibierePolyak: 550 snorm = step * dnorm; 551 if (!cg->diag_scaling) { 552 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 553 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 554 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha)); 555 beta = tau_k * gkp1_yk / gnorm2_old; 556 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 557 } else { 558 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); 559 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 560 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 561 beta = gkp1_yk / gnorm2_old; 562 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 563 } 564 break; 565 566 case CG_PolakRibierePlus: 567 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 568 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 569 ynorm2 = ynorm * ynorm; 570 if (!cg->diag_scaling) { 571 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 572 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 573 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha)); 574 beta = tau_k * gkp1_yk / gnorm2_old; 575 beta = PetscMax(beta, 0.0); 576 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 577 } else { 578 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */ 579 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 580 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 581 beta = gkp1_yk / gnorm2_old; 582 beta = PetscMax(beta, 0.0); 583 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 584 } 585 break; 586 587 case CG_DaiYuan: 588 /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property." 589 SIAM Journal on optimization 10, no. 1 (1999): 177-182. */ 590 if (!cg->diag_scaling) { 591 PetscCall(VecDot(tao->stepdirection, tao->gradient, &gd)); 592 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 593 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->yts, &tau_k, cg->alpha)); 594 beta = tau_k * gnorm2 / (gd - gd_old); 595 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 596 } else { 597 PetscCall(MatMult(cg->B, tao->stepdirection, cg->d_work)); 598 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 599 PetscCall(VecDot(cg->g_work, tao->gradient, >Dg)); 600 PetscCall(VecDot(tao->stepdirection, cg->G_old, &gd_old)); 601 PetscCall(VecDot(cg->d_work, cg->g_work, &dk_yk)); 602 dk_yk = dk_yk - gd_old; 603 beta = gtDg / dk_yk; 604 PetscCall(VecScale(cg->d_work, beta)); 605 PetscCall(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work)); 606 } 607 break; 608 609 case CG_HagerZhang: 610 /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent." 611 ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */ 612 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 613 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 614 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 615 snorm = dnorm * step; 616 cg->yts = step * dk_yk; 617 if (cg->use_dynamic_restart) { PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); } 618 if (cg->dynamic_restart) { 619 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 620 } else { 621 if (!cg->diag_scaling) { 622 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 623 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha)); 624 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */ 625 tmp = gd / dk_yk; 626 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk)); 627 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */ 628 beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm)); 629 /* d <- -t*g + beta*t*d */ 630 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 631 } else { 632 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 633 cg->yty = ynorm2; 634 cg->sts = snorm * snorm; 635 /* Apply the diagonal scaling to all my vectors */ 636 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 637 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 638 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 639 /* Construct the constant ytDgkp1 */ 640 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 641 /* Construct the constant for scaling Dkyk in the update */ 642 tmp = gd / dk_yk; 643 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 644 tau_k = -tau_k * gd / (dk_yk * dk_yk); 645 /* beta is the constant which adds the dk contribution */ 646 beta = gkp1_yk / dk_yk + cg->hz_theta * tau_k; /* HZ; (1.15) from DK 2013 */ 647 /* From HZ2013, modified to account for diagonal scaling*/ 648 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 649 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 650 beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm)); 651 /* Do the update */ 652 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 653 } 654 } 655 break; 656 657 case CG_DaiKou: 658 /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search." 659 SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */ 660 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 661 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 662 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 663 snorm = step * dnorm; 664 cg->yts = dk_yk * step; 665 if (!cg->diag_scaling) { 666 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 667 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha)); 668 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */ 669 tmp = gd / dk_yk; 670 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk) + gd / (dnorm * dnorm)) - step * gd / dk_yk; 671 beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm)); 672 /* d <- -t*g + beta*t*d */ 673 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk)); 674 } else { 675 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 676 cg->yty = ynorm2; 677 cg->sts = snorm * snorm; 678 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 679 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 680 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 681 /* Construct the constant ytDgkp1 */ 682 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 683 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 684 tau_k = tau_k * gd / (dk_yk * dk_yk); 685 tmp = gd / dk_yk; 686 /* beta is the constant which adds the dk contribution */ 687 beta = gkp1_yk / dk_yk - step * tmp - tau_k; 688 /* Update this for the last term in beta */ 689 PetscCall(VecDot(cg->y_work, tao->stepdirection, &dk_yk)); 690 beta += tmp * dk_yk / (dnorm * dnorm); /* projection of y_work onto dk */ 691 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 692 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 693 beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm)); 694 /* Do the update */ 695 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 696 } 697 break; 698 699 case CG_KouDai: 700 /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization." 701 Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */ 702 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 703 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 704 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 705 snorm = step * dnorm; 706 cg->yts = dk_yk * step; 707 if (cg->use_dynamic_restart) { PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); } 708 if (cg->dynamic_restart) { 709 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 710 } else { 711 if (!cg->diag_scaling) { 712 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 713 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha)); 714 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk)) - step * gd / dk_yk; 715 if (beta < cg->zeta * tau_k * gd / (dnorm * dnorm)) /* 0.1 is KD's zeta parameter */ 716 { 717 beta = cg->zeta * tau_k * gd / (dnorm * dnorm); 718 gamma = 0.0; 719 } else { 720 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0 * gd / dk_yk; 721 /* This seems to be very effective when there's no tau_k scaling. 722 This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */ 723 else { gamma = cg->xi * gd / dk_yk; } 724 } 725 /* d <- -t*g + beta*t*d + t*tmp*yk */ 726 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma * tau_k, beta, tao->gradient, cg->yk)); 727 } else { 728 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 729 cg->yty = ynorm2; 730 cg->sts = snorm * snorm; 731 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 732 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 733 /* Construct the constant ytDgkp1 */ 734 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1D_yk)); 735 /* Construct the constant for scaling Dkyk in the update */ 736 gamma = gd / dk_yk; 737 /* tau_k = -ytDy/(ytd)^2 * gd */ 738 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 739 tau_k = tau_k * gd / (dk_yk * dk_yk); 740 /* beta is the constant which adds the d_k contribution */ 741 beta = gkp1D_yk / dk_yk - step * gamma - tau_k; 742 /* Here is the requisite check */ 743 PetscCall(VecDot(tao->stepdirection, cg->g_work, &tmp)); 744 if (cg->neg_xi) { 745 /* modified KD implementation */ 746 if (gkp1D_yk / dk_yk < 0) gamma = -1.0 * gd / dk_yk; 747 else { gamma = cg->xi * gd / dk_yk; } 748 if (beta < cg->zeta * tmp / (dnorm * dnorm)) { 749 beta = cg->zeta * tmp / (dnorm * dnorm); 750 gamma = 0.0; 751 } 752 } else { /* original KD 2015 implementation */ 753 if (beta < cg->zeta * tmp / (dnorm * dnorm)) { 754 beta = cg->zeta * tmp / (dnorm * dnorm); 755 gamma = 0.0; 756 } else { 757 gamma = cg->xi * gd / dk_yk; 758 } 759 } 760 /* Do the update in two steps */ 761 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 762 PetscCall(VecAXPY(tao->stepdirection, gamma, cg->y_work)); 763 } 764 } 765 break; 766 767 case CG_SSML_BFGS: 768 /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory." 769 Discussion Papers 269 (1977). */ 770 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 771 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 772 snorm = step * dnorm; 773 cg->yts = dk_yk * step; 774 cg->yty = ynorm2; 775 cg->sts = snorm * snorm; 776 if (!cg->diag_scaling) { 777 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 778 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 779 tmp = gd / dk_yk; 780 beta = tau_k * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * tmp; 781 /* d <- -t*g + beta*t*d + t*tmp*yk */ 782 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp * tau_k, beta, tao->gradient, cg->yk)); 783 } else { 784 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 785 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 786 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 787 /* compute scalar gamma */ 788 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 789 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 790 gamma = gd / dk_yk; 791 /* Compute scalar beta */ 792 beta = (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk; 793 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 794 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 795 } 796 break; 797 798 case CG_SSML_DFP: 799 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 800 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 801 snorm = step * dnorm; 802 cg->yts = dk_yk * step; 803 cg->yty = ynorm2; 804 cg->sts = snorm * snorm; 805 if (!cg->diag_scaling) { 806 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 807 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 808 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 809 tau_k = cg->dfp_scale * tau_k; 810 tmp = tau_k * gkp1_yk / cg->yty; 811 beta = -step * gd / dk_yk; 812 /* d <- -t*g + beta*d + tmp*yk */ 813 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 814 } else { 815 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */ 816 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 817 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 818 /* compute scalar gamma */ 819 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 820 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 821 gamma = (gkp1_yk / tmp); 822 /* Compute scalar beta */ 823 beta = -step * gd / dk_yk; 824 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 825 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 826 } 827 break; 828 829 case CG_SSML_BROYDEN: 830 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 831 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 832 snorm = step * dnorm; 833 cg->yts = step * dk_yk; 834 cg->yty = ynorm2; 835 cg->sts = snorm * snorm; 836 if (!cg->diag_scaling) { 837 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 838 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_bfgs, cg->bfgs_scale)); 839 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_dfp, cg->dfp_scale)); 840 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 841 tau_k = cg->theta * tau_bfgs + (1.0 - cg->theta) * tau_dfp; 842 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0, 843 it should reproduce the tau_dfp scaling. Same with dfp_scale. */ 844 tmp = cg->theta * tau_bfgs * gd / dk_yk + (1 - cg->theta) * tau_dfp * gkp1_yk / cg->yty; 845 beta = cg->theta * tau_bfgs * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * gd / dk_yk; 846 /* d <- -t*g + beta*d + tmp*yk */ 847 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 848 } else { 849 /* We have diagonal scaling enabled */ 850 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 851 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 852 /* compute scalar gamma */ 853 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 854 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 855 gamma = cg->theta * gd / dk_yk + (1 - cg->theta) * (gkp1_yk / tmp); 856 /* Compute scalar beta */ 857 beta = cg->theta * (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk; 858 /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 859 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 860 } 861 break; 862 863 default: break; 864 } 865 } 866 PetscFunctionReturn(0); 867 } 868 869 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm) { 870 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 871 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 872 PetscReal step = 1.0, gnorm2, gd, dnorm = 0.0; 873 PetscReal gnorm2_old, f_old, resnorm, gnorm_old; 874 PetscBool pcgd_fallback = PETSC_FALSE; 875 876 PetscFunctionBegin; 877 /* We are now going to perform a line search along the direction. */ 878 /* Store solution and gradient info before it changes */ 879 PetscCall(VecCopy(tao->solution, cg->X_old)); 880 PetscCall(VecCopy(tao->gradient, cg->G_old)); 881 PetscCall(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old)); 882 883 gnorm_old = gnorm; 884 gnorm2_old = gnorm_old * gnorm_old; 885 f_old = cg->f; 886 /* Perform bounded line search. If we are recycling a solution from a previous */ 887 /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */ 888 if (!(tao->recycle && 0 == tao->niter)) { 889 /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */ 890 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 891 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 892 PetscCall(TaoAddLineSearchCounts(tao)); 893 894 /* Check linesearch failure */ 895 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 896 ++cg->ls_fails; 897 if (cg->cg_type == CG_GradientDescent) { 898 /* Nothing left to do but fail out of the optimization */ 899 step = 0.0; 900 tao->reason = TAO_DIVERGED_LS_FAILURE; 901 } else { 902 /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */ 903 PetscCall(VecCopy(cg->X_old, tao->solution)); 904 PetscCall(VecCopy(cg->G_old, tao->gradient)); 905 PetscCall(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient)); 906 gnorm = gnorm_old; 907 gnorm2 = gnorm2_old; 908 cg->f = f_old; 909 910 /* Fall back on preconditioned CG (so long as you're not already using it) */ 911 if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling) { 912 pcgd_fallback = PETSC_TRUE; 913 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 914 915 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 916 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 917 918 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 919 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 920 PetscCall(TaoAddLineSearchCounts(tao)); 921 922 pcgd_fallback = PETSC_FALSE; 923 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 924 /* Going to perform a regular gradient descent step. */ 925 ++cg->ls_fails; 926 step = 0.0; 927 } 928 } 929 /* Fall back on the scaled gradient step */ 930 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 931 ++cg->ls_fails; 932 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 933 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 934 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 935 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 936 PetscCall(TaoAddLineSearchCounts(tao)); 937 } 938 939 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 940 /* Nothing left to do but fail out of the optimization */ 941 ++cg->ls_fails; 942 step = 0.0; 943 tao->reason = TAO_DIVERGED_LS_FAILURE; 944 } else { 945 /* One of the fallbacks worked. Set them both back equal to false. */ 946 pcgd_fallback = PETSC_FALSE; 947 } 948 } 949 } 950 /* Convergence test for line search failure */ 951 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 952 953 /* Standard convergence test */ 954 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 955 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 956 PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 957 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 958 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 959 PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 960 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 961 } 962 /* Assert we have an updated step and we need at least one more iteration. */ 963 /* Calculate the next direction */ 964 /* Estimate the active set at the new solution */ 965 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 966 /* Compute the projected gradient and its norm */ 967 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 968 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 969 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 970 gnorm2 = gnorm * gnorm; 971 972 /* Calculate some quantities used in the StepDirectionUpdate. */ 973 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 974 /* Update the step direction. */ 975 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 976 ++tao->niter; 977 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 978 979 if (cg->cg_type != CG_GradientDescent) { 980 /* Figure out which previously active variables became inactive this iteration */ 981 PetscCall(ISDestroy(&cg->new_inactives)); 982 if (cg->inactive_idx && cg->inactive_old) { PetscCall(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives)); } 983 /* Selectively reset the CG step those freshly inactive variables */ 984 if (cg->new_inactives) { 985 PetscCall(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 986 PetscCall(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 987 PetscCall(VecCopy(cg->inactive_grad, cg->inactive_step)); 988 PetscCall(VecScale(cg->inactive_step, -1.0)); 989 PetscCall(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 990 PetscCall(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 991 } 992 /* Verify that this is a descent direction */ 993 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 994 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 995 if (PetscIsInfOrNanReal(gd) || (gd / (dnorm * dnorm) <= -1e10 || gd / (dnorm * dnorm) >= -1e-10)) { 996 /* Not a descent direction, so we reset back to projected gradient descent */ 997 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 998 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 999 ++cg->descent_error; 1000 } else { 1001 } 1002 } 1003 PetscFunctionReturn(0); 1004 } 1005 1006 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0) { 1007 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 1008 1009 PetscFunctionBegin; 1010 PetscCall(PetscObjectReference((PetscObject)H0)); 1011 cg->pc = H0; 1012 PetscFunctionReturn(0); 1013 } 1014