1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/ 3 #include <petscksp.h> 4 5 #define CG_GradientDescent 0 6 #define CG_HestenesStiefel 1 7 #define CG_FletcherReeves 2 8 #define CG_PolakRibierePolyak 3 9 #define CG_PolakRibierePlus 4 10 #define CG_DaiYuan 5 11 #define CG_HagerZhang 6 12 #define CG_DaiKou 7 13 #define CG_KouDai 8 14 #define CG_SSML_BFGS 9 15 #define CG_SSML_DFP 10 16 #define CG_SSML_BROYDEN 11 17 #define CG_PCGradientDescent 12 18 #define CGTypes 13 19 20 static const char *CG_Table[64] = {"gd", "hs", "fr", "pr", "prp", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "pcgd"}; 21 22 #define CG_AS_NONE 0 23 #define CG_AS_BERTSEKAS 1 24 #define CG_AS_SIZE 2 25 26 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 27 28 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 29 { 30 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 31 32 PetscFunctionBegin; 33 PetscCall(ISDestroy(&cg->inactive_old)); 34 if (cg->inactive_idx) { 35 PetscCall(ISDuplicate(cg->inactive_idx, &cg->inactive_old)); 36 PetscCall(ISCopy(cg->inactive_idx, cg->inactive_old)); 37 } 38 switch (asType) { 39 case CG_AS_NONE: 40 PetscCall(ISDestroy(&cg->inactive_idx)); 41 PetscCall(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx)); 42 PetscCall(ISDestroy(&cg->active_idx)); 43 PetscCall(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx)); 44 break; 45 46 case CG_AS_BERTSEKAS: 47 /* Use gradient descent to estimate the active set */ 48 PetscCall(VecCopy(cg->unprojected_gradient, cg->W)); 49 PetscCall(VecScale(cg->W, -1.0)); 50 PetscCall(TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx)); 51 break; 52 default: 53 break; 54 } 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) 59 { 60 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 61 62 PetscFunctionBegin; 63 switch (asType) { 64 case CG_AS_NONE: 65 PetscCall(VecISSet(step, cg->active_idx, 0.0)); 66 break; 67 68 case CG_AS_BERTSEKAS: 69 PetscCall(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step)); 70 break; 71 72 default: 73 break; 74 } 75 PetscFunctionReturn(0); 76 } 77 78 static PetscErrorCode TaoSolve_BNCG(Tao tao) 79 { 80 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 81 PetscReal step = 1.0, gnorm, gnorm2, resnorm; 82 PetscInt nDiff; 83 84 PetscFunctionBegin; 85 /* Project the current point onto the feasible set */ 86 PetscCall(TaoComputeVariableBounds(tao)); 87 PetscCall(TaoLineSearchSetVariableBounds(tao->linesearch, tao->XL, tao->XU)); 88 89 /* Project the initial point onto the feasible region */ 90 PetscCall(TaoBoundSolution(tao->solution, tao->XL, tao->XU, 0.0, &nDiff, tao->solution)); 91 92 if (nDiff > 0 || !tao->recycle) PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 93 PetscCall(VecNorm(cg->unprojected_gradient, NORM_2, &gnorm)); 94 PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 95 96 /* Estimate the active set and compute the projected gradient */ 97 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 98 99 /* Project the gradient and calculate the norm */ 100 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 101 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 102 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 103 gnorm2 = gnorm * gnorm; 104 105 /* Initialize counters */ 106 tao->niter = 0; 107 cg->ls_fails = cg->descent_error = 0; 108 cg->resets = -1; 109 cg->skipped_updates = cg->pure_gd_steps = 0; 110 cg->iter_quad = 0; 111 112 /* Convergence test at the starting point. */ 113 tao->reason = TAO_CONTINUE_ITERATING; 114 115 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 116 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 117 PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 118 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 119 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 120 PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 121 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 122 /* Calculate initial direction. */ 123 if (!tao->recycle) { 124 /* We are not recycling a solution/history from a past TaoSolve */ 125 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 126 } 127 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */ 128 while (1) { 129 /* Call general purpose update function */ 130 if (tao->ops->update) { 131 PetscUseTypeMethod(tao, update, tao->niter, tao->user_update); 132 PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 133 } 134 PetscCall(TaoBNCGConductIteration(tao, gnorm)); 135 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 136 } 137 } 138 139 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 140 { 141 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 142 143 PetscFunctionBegin; 144 if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient)); 145 if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection)); 146 if (!cg->W) PetscCall(VecDuplicate(tao->solution, &cg->W)); 147 if (!cg->work) PetscCall(VecDuplicate(tao->solution, &cg->work)); 148 if (!cg->sk) PetscCall(VecDuplicate(tao->solution, &cg->sk)); 149 if (!cg->yk) PetscCall(VecDuplicate(tao->gradient, &cg->yk)); 150 if (!cg->X_old) PetscCall(VecDuplicate(tao->solution, &cg->X_old)); 151 if (!cg->G_old) PetscCall(VecDuplicate(tao->gradient, &cg->G_old)); 152 if (cg->diag_scaling) { 153 PetscCall(VecDuplicate(tao->solution, &cg->d_work)); 154 PetscCall(VecDuplicate(tao->solution, &cg->y_work)); 155 PetscCall(VecDuplicate(tao->solution, &cg->g_work)); 156 } 157 if (!cg->unprojected_gradient) PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient)); 158 if (!cg->unprojected_gradient_old) PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient_old)); 159 PetscCall(MatLMVMAllocate(cg->B, cg->sk, cg->yk)); 160 if (cg->pc) PetscCall(MatLMVMSetJ0(cg->B, cg->pc)); 161 PetscFunctionReturn(0); 162 } 163 164 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 165 { 166 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 167 168 PetscFunctionBegin; 169 if (tao->setupcalled) { 170 PetscCall(VecDestroy(&cg->W)); 171 PetscCall(VecDestroy(&cg->work)); 172 PetscCall(VecDestroy(&cg->X_old)); 173 PetscCall(VecDestroy(&cg->G_old)); 174 PetscCall(VecDestroy(&cg->unprojected_gradient)); 175 PetscCall(VecDestroy(&cg->unprojected_gradient_old)); 176 PetscCall(VecDestroy(&cg->g_work)); 177 PetscCall(VecDestroy(&cg->d_work)); 178 PetscCall(VecDestroy(&cg->y_work)); 179 PetscCall(VecDestroy(&cg->sk)); 180 PetscCall(VecDestroy(&cg->yk)); 181 } 182 PetscCall(ISDestroy(&cg->active_lower)); 183 PetscCall(ISDestroy(&cg->active_upper)); 184 PetscCall(ISDestroy(&cg->active_fixed)); 185 PetscCall(ISDestroy(&cg->active_idx)); 186 PetscCall(ISDestroy(&cg->inactive_idx)); 187 PetscCall(ISDestroy(&cg->inactive_old)); 188 PetscCall(ISDestroy(&cg->new_inactives)); 189 PetscCall(MatDestroy(&cg->B)); 190 if (cg->pc) PetscCall(MatDestroy(&cg->pc)); 191 PetscCall(PetscFree(tao->data)); 192 PetscFunctionReturn(0); 193 } 194 195 static PetscErrorCode TaoSetFromOptions_BNCG(Tao tao, PetscOptionItems *PetscOptionsObject) 196 { 197 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 198 199 PetscFunctionBegin; 200 PetscOptionsHeadBegin(PetscOptionsObject, "Nonlinear Conjugate Gradient method for unconstrained optimization"); 201 PetscCall(PetscOptionsEList("-tao_bncg_type", "cg formula", "", CG_Table, CGTypes, CG_Table[cg->cg_type], &cg->cg_type, NULL)); 202 if (cg->cg_type != CG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */ 203 if (CG_GradientDescent == cg->cg_type) { 204 cg->cg_type = CG_PCGradientDescent; 205 /* Set scaling equal to none or, at best, scalar scaling. */ 206 cg->unscaled_restart = PETSC_TRUE; 207 cg->diag_scaling = PETSC_FALSE; 208 } 209 PetscCall(PetscOptionsEList("-tao_bncg_as_type", "active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->cg_type], &cg->cg_type, NULL)); 210 PetscCall(PetscOptionsReal("-tao_bncg_hz_eta", "(developer) cutoff tolerance for HZ", "", cg->hz_eta, &cg->hz_eta, NULL)); 211 PetscCall(PetscOptionsReal("-tao_bncg_eps", "(developer) cutoff value for restarts", "", cg->epsilon, &cg->epsilon, NULL)); 212 PetscCall(PetscOptionsReal("-tao_bncg_dk_eta", "(developer) cutoff tolerance for DK", "", cg->dk_eta, &cg->dk_eta, NULL)); 213 PetscCall(PetscOptionsReal("-tao_bncg_xi", "(developer) Parameter in the KD method", "", cg->xi, &cg->xi, NULL)); 214 PetscCall(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL)); 215 PetscCall(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL)); 216 PetscCall(PetscOptionsReal("-tao_bncg_alpha", "(developer) parameter for the scalar scaling", "", cg->alpha, &cg->alpha, NULL)); 217 PetscCall(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL)); 218 PetscCall(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL)); 219 PetscCall(PetscOptionsBool("-tao_bncg_diag_scaling", "Enable diagonal Broyden-like preconditioning", "", cg->diag_scaling, &cg->diag_scaling, NULL)); 220 PetscCall(PetscOptionsBool("-tao_bncg_dynamic_restart", "(developer) use dynamic restarts as in HZ, DK, KD", "", cg->use_dynamic_restart, &cg->use_dynamic_restart, NULL)); 221 PetscCall(PetscOptionsBool("-tao_bncg_unscaled_restart", "(developer) use unscaled gradient restarts", "", cg->unscaled_restart, &cg->unscaled_restart, NULL)); 222 PetscCall(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL)); 223 PetscCall(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL)); 224 PetscCall(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL)); 225 PetscCall(PetscOptionsBool("-tao_bncg_spaced_restart", "(developer) Enable regular steepest descent restarting every fixed number of iterations", "", cg->spaced_restart, &cg->spaced_restart, NULL)); 226 PetscCall(PetscOptionsBool("-tao_bncg_no_scaling", "Disable all scaling except in restarts", "", cg->no_scaling, &cg->no_scaling, NULL)); 227 if (cg->no_scaling) { 228 cg->diag_scaling = PETSC_FALSE; 229 cg->alpha = -1.0; 230 } 231 if (cg->alpha == -1.0 && cg->cg_type == CG_KouDai && !cg->diag_scaling) { /* Some more default options that appear to be good. */ 232 cg->neg_xi = PETSC_TRUE; 233 } 234 PetscCall(PetscOptionsBool("-tao_bncg_neg_xi", "(developer) Use negative xi when it might be a smaller descent direction than necessary", "", cg->neg_xi, &cg->neg_xi, NULL)); 235 PetscCall(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables", "", cg->as_tol, &cg->as_tol, NULL)); 236 PetscCall(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables", "", cg->as_step, &cg->as_step, NULL)); 237 PetscCall(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts", "", cg->delta_min, &cg->delta_min, NULL)); 238 PetscCall(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts", "", cg->delta_max, &cg->delta_max, NULL)); 239 240 PetscOptionsHeadEnd(); 241 PetscCall(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix)); 242 PetscCall(MatAppendOptionsPrefix(cg->B, "tao_bncg_")); 243 PetscCall(MatSetFromOptions(cg->B)); 244 PetscFunctionReturn(0); 245 } 246 247 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 248 { 249 PetscBool isascii; 250 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 251 252 PetscFunctionBegin; 253 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 254 if (isascii) { 255 PetscCall(PetscViewerASCIIPushTab(viewer)); 256 PetscCall(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", CG_Table[cg->cg_type])); 257 PetscCall(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %" PetscInt_FMT "\n", cg->skipped_updates)); 258 PetscCall(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %" PetscInt_FMT "\n", cg->resets)); 259 PetscCall(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %" PetscInt_FMT "\n", cg->pure_gd_steps)); 260 PetscCall(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %" PetscInt_FMT "\n", cg->descent_error)); 261 PetscCall(PetscViewerASCIIPrintf(viewer, "Line search fails: %" PetscInt_FMT "\n", cg->ls_fails)); 262 if (cg->diag_scaling) { 263 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 264 if (isascii) { 265 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 266 PetscCall(MatView(cg->B, viewer)); 267 PetscCall(PetscViewerPopFormat(viewer)); 268 } 269 } 270 PetscCall(PetscViewerASCIIPopTab(viewer)); 271 } 272 PetscFunctionReturn(0); 273 } 274 275 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha) 276 { 277 PetscReal a, b, c, sig1, sig2; 278 279 PetscFunctionBegin; 280 *scale = 0.0; 281 if (1.0 == alpha) *scale = yts / yty; 282 else if (0.0 == alpha) *scale = sts / yts; 283 else if (-1.0 == alpha) *scale = 1.0; 284 else { 285 a = yty; 286 b = yts; 287 c = sts; 288 a *= alpha; 289 b *= -(2.0 * alpha - 1.0); 290 c *= alpha - 1.0; 291 sig1 = (-b + PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a); 292 sig2 = (-b - PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a); 293 /* accept the positive root as the scalar */ 294 if (sig1 > 0.0) *scale = sig1; 295 else if (sig2 > 0.0) *scale = sig2; 296 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar"); 297 } 298 PetscFunctionReturn(0); 299 } 300 301 /*MC 302 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 303 304 Options Database Keys: 305 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled) 306 . -tao_bncg_eta <r> - restart tolerance 307 . -tao_bncg_type <taocg_type> - cg formula 308 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 309 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 310 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 311 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision. 312 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method 313 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 314 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 315 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method 316 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method 317 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method 318 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method 319 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True. 320 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods 321 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts 322 . -tao_bncg_zeta <r> - Scaling parameter in the KD method 323 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps 324 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps 325 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart 326 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem 327 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations 328 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults. 329 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions 330 331 Notes: 332 CG formulas are: 333 + "gd" - Gradient Descent 334 . "fr" - Fletcher-Reeves 335 . "pr" - Polak-Ribiere-Polyak 336 . "prp" - Polak-Ribiere-Plus 337 . "hs" - Hestenes-Steifel 338 . "dy" - Dai-Yuan 339 . "ssml_bfgs" - Self-Scaling Memoryless BFGS 340 . "ssml_dfp" - Self-Scaling Memoryless DFP 341 . "ssml_brdn" - Self-Scaling Memoryless Broyden 342 . "hz" - Hager-Zhang (CG_DESCENT 5.3) 343 . "dk" - Dai-Kou (2013) 344 - "kd" - Kou-Dai (2015) 345 346 Level: beginner 347 M*/ 348 349 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 350 { 351 TAO_BNCG *cg; 352 const char *morethuente_type = TAOLINESEARCHMT; 353 354 PetscFunctionBegin; 355 tao->ops->setup = TaoSetUp_BNCG; 356 tao->ops->solve = TaoSolve_BNCG; 357 tao->ops->view = TaoView_BNCG; 358 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 359 tao->ops->destroy = TaoDestroy_BNCG; 360 361 /* Override default settings (unless already changed) */ 362 if (!tao->max_it_changed) tao->max_it = 2000; 363 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 364 365 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 366 /* method. In particular, gtol should be less that 0.5; the value used in */ 367 /* Nocedal and Wright is 0.10. We use the default values for the */ 368 /* linesearch because it seems to work better. */ 369 PetscCall(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch)); 370 PetscCall(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1)); 371 PetscCall(TaoLineSearchSetType(tao->linesearch, morethuente_type)); 372 PetscCall(TaoLineSearchUseTaoRoutines(tao->linesearch, tao)); 373 374 PetscCall(PetscNew(&cg)); 375 tao->data = (void *)cg; 376 PetscCall(KSPInitializePackage()); 377 PetscCall(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B)); 378 PetscCall(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1)); 379 PetscCall(MatSetType(cg->B, MATLMVMDIAGBROYDEN)); 380 381 cg->pc = NULL; 382 383 cg->dk_eta = 0.5; 384 cg->hz_eta = 0.4; 385 cg->dynamic_restart = PETSC_FALSE; 386 cg->unscaled_restart = PETSC_FALSE; 387 cg->no_scaling = PETSC_FALSE; 388 cg->delta_min = 1e-7; 389 cg->delta_max = 100; 390 cg->theta = 1.0; 391 cg->hz_theta = 1.0; 392 cg->dfp_scale = 1.0; 393 cg->bfgs_scale = 1.0; 394 cg->zeta = 0.1; 395 cg->min_quad = 6; 396 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/ 397 cg->xi = 1.0; 398 cg->neg_xi = PETSC_TRUE; 399 cg->spaced_restart = PETSC_FALSE; 400 cg->tol_quad = 1e-8; 401 cg->as_step = 0.001; 402 cg->as_tol = 0.001; 403 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0 / 3.0); /* Just a little tighter*/ 404 cg->as_type = CG_AS_BERTSEKAS; 405 cg->cg_type = CG_SSML_BFGS; 406 cg->alpha = 1.0; 407 cg->diag_scaling = PETSC_TRUE; 408 PetscFunctionReturn(0); 409 } 410 411 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq) 412 { 413 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 414 PetscReal scaling; 415 416 PetscFunctionBegin; 417 ++cg->resets; 418 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23); 419 scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling)); 420 if (cg->unscaled_restart) { 421 scaling = 1.0; 422 ++cg->pure_gd_steps; 423 } 424 PetscCall(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient)); 425 /* Also want to reset our diagonal scaling with each restart */ 426 if (cg->diag_scaling) PetscCall(MatLMVMReset(cg->B, PETSC_FALSE)); 427 PetscFunctionReturn(0); 428 } 429 430 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold) 431 { 432 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 433 PetscReal quadinterp; 434 435 PetscFunctionBegin; 436 if (cg->f < cg->min_quad / 10) { 437 *dynrestart = PETSC_FALSE; 438 PetscFunctionReturn(0); 439 } /* just skip this since this strategy doesn't work well for functions near zero */ 440 quadinterp = 2.0 * (cg->f - fold) / (stepsize * (gd + gd_old)); 441 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad; 442 else { 443 cg->iter_quad = 0; 444 *dynrestart = PETSC_FALSE; 445 } 446 if (cg->iter_quad >= cg->min_quad) { 447 cg->iter_quad = 0; 448 *dynrestart = PETSC_TRUE; 449 } 450 PetscFunctionReturn(0); 451 } 452 453 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback) 454 { 455 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 456 PetscReal gamma = 1.0, tau_k, beta; 457 PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk = 1.0, gd; 458 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg; 459 PetscInt dim; 460 PetscBool cg_restart = PETSC_FALSE; 461 PetscFunctionBegin; 462 463 /* Local curvature check to see if we need to restart */ 464 if (tao->niter >= 1 || tao->recycle) { 465 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 466 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 467 ynorm2 = ynorm * ynorm; 468 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 469 if (step * dnorm < PETSC_MACHINE_EPSILON || step * dk_yk < PETSC_MACHINE_EPSILON) { 470 cg_restart = PETSC_TRUE; 471 ++cg->skipped_updates; 472 } 473 if (cg->spaced_restart) { 474 PetscCall(VecGetSize(tao->gradient, &dim)); 475 if (tao->niter % (dim * cg->min_restart_num)) cg_restart = PETSC_TRUE; 476 } 477 } 478 /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */ 479 if (cg->spaced_restart) { 480 PetscCall(VecGetSize(tao->gradient, &dim)); 481 if (0 == tao->niter % (6 * dim)) cg_restart = PETSC_TRUE; 482 } 483 /* Compute the diagonal scaling vector if applicable */ 484 if (cg->diag_scaling) PetscCall(MatLMVMUpdate(cg->B, tao->solution, tao->gradient)); 485 486 /* A note on diagonal scaling (to be added to paper): 487 For the FR, PR, PRP, and DY methods, the diagonally scaled versions 488 must be derived as a preconditioned CG method rather than as 489 a Hessian initialization like in the Broyden methods. */ 490 491 /* In that case, one writes the objective function as 492 f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k). 493 Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to 494 HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the 495 same under preconditioning. Note that A is diagonal, such that A^T = A. */ 496 497 /* This yields questions like what the dot product d_k^T y_k 498 should look like. HZ mistakenly treats that as the same under 499 preconditioning, but that is not necessarily true. */ 500 501 /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation, 502 we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}), 503 yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is 504 NOT the same if our preconditioning matrix is updated between iterations. 505 This same issue is found when considering dot products of the form g_{k+1}^T y_k. */ 506 507 /* Compute CG step direction */ 508 if (cg_restart) { 509 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 510 } else if (pcgd_fallback) { 511 /* Just like preconditioned CG */ 512 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 513 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 514 } else if (ynorm2 > PETSC_MACHINE_EPSILON) { 515 switch (cg->cg_type) { 516 case CG_PCGradientDescent: 517 if (!cg->diag_scaling) { 518 if (!cg->no_scaling) { 519 cg->sts = step * step * dnorm * dnorm; 520 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha)); 521 } else { 522 tau_k = 1.0; 523 ++cg->pure_gd_steps; 524 } 525 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient)); 526 } else { 527 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 528 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 529 } 530 break; 531 532 case CG_HestenesStiefel: 533 /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */ 534 if (!cg->diag_scaling) { 535 cg->sts = step * step * dnorm * dnorm; 536 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 537 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha)); 538 beta = tau_k * gkp1_yk / dk_yk; 539 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 540 } else { 541 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 542 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 543 beta = gkp1_yk / dk_yk; 544 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 545 } 546 break; 547 548 case CG_FletcherReeves: 549 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 550 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 551 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 552 ynorm2 = ynorm * ynorm; 553 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 554 if (!cg->diag_scaling) { 555 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, step * step * dnorm * dnorm, &tau_k, cg->alpha)); 556 beta = tau_k * gnorm2 / gnorm2_old; 557 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 558 } else { 559 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */ 560 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 561 PetscCall(VecDot(tao->gradient, cg->g_work, &tmp)); 562 beta = tmp / gnorm2_old; 563 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 564 } 565 break; 566 567 case CG_PolakRibierePolyak: 568 snorm = step * dnorm; 569 if (!cg->diag_scaling) { 570 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 571 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 572 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha)); 573 beta = tau_k * gkp1_yk / gnorm2_old; 574 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 575 } else { 576 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); 577 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 578 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 579 beta = gkp1_yk / gnorm2_old; 580 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 581 } 582 break; 583 584 case CG_PolakRibierePlus: 585 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 586 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 587 ynorm2 = ynorm * ynorm; 588 if (!cg->diag_scaling) { 589 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 590 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 591 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha)); 592 beta = tau_k * gkp1_yk / gnorm2_old; 593 beta = PetscMax(beta, 0.0); 594 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 595 } else { 596 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */ 597 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 598 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 599 beta = gkp1_yk / gnorm2_old; 600 beta = PetscMax(beta, 0.0); 601 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 602 } 603 break; 604 605 case CG_DaiYuan: 606 /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property." 607 SIAM Journal on optimization 10, no. 1 (1999): 177-182. */ 608 if (!cg->diag_scaling) { 609 PetscCall(VecDot(tao->stepdirection, tao->gradient, &gd)); 610 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 611 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->yts, &tau_k, cg->alpha)); 612 beta = tau_k * gnorm2 / (gd - gd_old); 613 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 614 } else { 615 PetscCall(MatMult(cg->B, tao->stepdirection, cg->d_work)); 616 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 617 PetscCall(VecDot(cg->g_work, tao->gradient, >Dg)); 618 PetscCall(VecDot(tao->stepdirection, cg->G_old, &gd_old)); 619 PetscCall(VecDot(cg->d_work, cg->g_work, &dk_yk)); 620 dk_yk = dk_yk - gd_old; 621 beta = gtDg / dk_yk; 622 PetscCall(VecScale(cg->d_work, beta)); 623 PetscCall(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work)); 624 } 625 break; 626 627 case CG_HagerZhang: 628 /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent." 629 ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */ 630 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 631 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 632 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 633 snorm = dnorm * step; 634 cg->yts = step * dk_yk; 635 if (cg->use_dynamic_restart) PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 636 if (cg->dynamic_restart) { 637 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 638 } else { 639 if (!cg->diag_scaling) { 640 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 641 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha)); 642 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */ 643 tmp = gd / dk_yk; 644 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk)); 645 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */ 646 beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm)); 647 /* d <- -t*g + beta*t*d */ 648 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 649 } else { 650 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 651 cg->yty = ynorm2; 652 cg->sts = snorm * snorm; 653 /* Apply the diagonal scaling to all my vectors */ 654 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 655 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 656 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 657 /* Construct the constant ytDgkp1 */ 658 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 659 /* Construct the constant for scaling Dkyk in the update */ 660 tmp = gd / dk_yk; 661 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 662 tau_k = -tau_k * gd / (dk_yk * dk_yk); 663 /* beta is the constant which adds the dk contribution */ 664 beta = gkp1_yk / dk_yk + cg->hz_theta * tau_k; /* HZ; (1.15) from DK 2013 */ 665 /* From HZ2013, modified to account for diagonal scaling*/ 666 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 667 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 668 beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm)); 669 /* Do the update */ 670 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 671 } 672 } 673 break; 674 675 case CG_DaiKou: 676 /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search." 677 SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */ 678 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 679 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 680 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 681 snorm = step * dnorm; 682 cg->yts = dk_yk * step; 683 if (!cg->diag_scaling) { 684 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 685 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha)); 686 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */ 687 tmp = gd / dk_yk; 688 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk) + gd / (dnorm * dnorm)) - step * gd / dk_yk; 689 beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm)); 690 /* d <- -t*g + beta*t*d */ 691 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk)); 692 } else { 693 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 694 cg->yty = ynorm2; 695 cg->sts = snorm * snorm; 696 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 697 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 698 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 699 /* Construct the constant ytDgkp1 */ 700 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 701 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 702 tau_k = tau_k * gd / (dk_yk * dk_yk); 703 tmp = gd / dk_yk; 704 /* beta is the constant which adds the dk contribution */ 705 beta = gkp1_yk / dk_yk - step * tmp - tau_k; 706 /* Update this for the last term in beta */ 707 PetscCall(VecDot(cg->y_work, tao->stepdirection, &dk_yk)); 708 beta += tmp * dk_yk / (dnorm * dnorm); /* projection of y_work onto dk */ 709 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 710 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 711 beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm)); 712 /* Do the update */ 713 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 714 } 715 break; 716 717 case CG_KouDai: 718 /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization." 719 Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */ 720 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 721 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 722 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 723 snorm = step * dnorm; 724 cg->yts = dk_yk * step; 725 if (cg->use_dynamic_restart) PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 726 if (cg->dynamic_restart) { 727 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 728 } else { 729 if (!cg->diag_scaling) { 730 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 731 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha)); 732 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk)) - step * gd / dk_yk; 733 if (beta < cg->zeta * tau_k * gd / (dnorm * dnorm)) /* 0.1 is KD's zeta parameter */ 734 { 735 beta = cg->zeta * tau_k * gd / (dnorm * dnorm); 736 gamma = 0.0; 737 } else { 738 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0 * gd / dk_yk; 739 /* This seems to be very effective when there's no tau_k scaling. 740 This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */ 741 else gamma = cg->xi * gd / dk_yk; 742 } 743 /* d <- -t*g + beta*t*d + t*tmp*yk */ 744 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma * tau_k, beta, tao->gradient, cg->yk)); 745 } else { 746 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 747 cg->yty = ynorm2; 748 cg->sts = snorm * snorm; 749 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 750 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 751 /* Construct the constant ytDgkp1 */ 752 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1D_yk)); 753 /* Construct the constant for scaling Dkyk in the update */ 754 gamma = gd / dk_yk; 755 /* tau_k = -ytDy/(ytd)^2 * gd */ 756 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 757 tau_k = tau_k * gd / (dk_yk * dk_yk); 758 /* beta is the constant which adds the d_k contribution */ 759 beta = gkp1D_yk / dk_yk - step * gamma - tau_k; 760 /* Here is the requisite check */ 761 PetscCall(VecDot(tao->stepdirection, cg->g_work, &tmp)); 762 if (cg->neg_xi) { 763 /* modified KD implementation */ 764 if (gkp1D_yk / dk_yk < 0) gamma = -1.0 * gd / dk_yk; 765 else gamma = cg->xi * gd / dk_yk; 766 if (beta < cg->zeta * tmp / (dnorm * dnorm)) { 767 beta = cg->zeta * tmp / (dnorm * dnorm); 768 gamma = 0.0; 769 } 770 } else { /* original KD 2015 implementation */ 771 if (beta < cg->zeta * tmp / (dnorm * dnorm)) { 772 beta = cg->zeta * tmp / (dnorm * dnorm); 773 gamma = 0.0; 774 } else gamma = cg->xi * gd / dk_yk; 775 } 776 /* Do the update in two steps */ 777 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 778 PetscCall(VecAXPY(tao->stepdirection, gamma, cg->y_work)); 779 } 780 } 781 break; 782 783 case CG_SSML_BFGS: 784 /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory." 785 Discussion Papers 269 (1977). */ 786 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 787 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 788 snorm = step * dnorm; 789 cg->yts = dk_yk * step; 790 cg->yty = ynorm2; 791 cg->sts = snorm * snorm; 792 if (!cg->diag_scaling) { 793 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 794 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 795 tmp = gd / dk_yk; 796 beta = tau_k * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * tmp; 797 /* d <- -t*g + beta*t*d + t*tmp*yk */ 798 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp * tau_k, beta, tao->gradient, cg->yk)); 799 } else { 800 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 801 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 802 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 803 /* compute scalar gamma */ 804 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 805 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 806 gamma = gd / dk_yk; 807 /* Compute scalar beta */ 808 beta = (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk; 809 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 810 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 811 } 812 break; 813 814 case CG_SSML_DFP: 815 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 816 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 817 snorm = step * dnorm; 818 cg->yts = dk_yk * step; 819 cg->yty = ynorm2; 820 cg->sts = snorm * snorm; 821 if (!cg->diag_scaling) { 822 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 823 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 824 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 825 tau_k = cg->dfp_scale * tau_k; 826 tmp = tau_k * gkp1_yk / cg->yty; 827 beta = -step * gd / dk_yk; 828 /* d <- -t*g + beta*d + tmp*yk */ 829 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 830 } else { 831 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */ 832 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 833 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 834 /* compute scalar gamma */ 835 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 836 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 837 gamma = (gkp1_yk / tmp); 838 /* Compute scalar beta */ 839 beta = -step * gd / dk_yk; 840 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 841 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 842 } 843 break; 844 845 case CG_SSML_BROYDEN: 846 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 847 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 848 snorm = step * dnorm; 849 cg->yts = step * dk_yk; 850 cg->yty = ynorm2; 851 cg->sts = snorm * snorm; 852 if (!cg->diag_scaling) { 853 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 854 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_bfgs, cg->bfgs_scale)); 855 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_dfp, cg->dfp_scale)); 856 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 857 tau_k = cg->theta * tau_bfgs + (1.0 - cg->theta) * tau_dfp; 858 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0, 859 it should reproduce the tau_dfp scaling. Same with dfp_scale. */ 860 tmp = cg->theta * tau_bfgs * gd / dk_yk + (1 - cg->theta) * tau_dfp * gkp1_yk / cg->yty; 861 beta = cg->theta * tau_bfgs * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * gd / dk_yk; 862 /* d <- -t*g + beta*d + tmp*yk */ 863 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 864 } else { 865 /* We have diagonal scaling enabled */ 866 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 867 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 868 /* compute scalar gamma */ 869 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 870 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 871 gamma = cg->theta * gd / dk_yk + (1 - cg->theta) * (gkp1_yk / tmp); 872 /* Compute scalar beta */ 873 beta = cg->theta * (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk; 874 /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 875 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 876 } 877 break; 878 879 default: 880 break; 881 } 882 } 883 PetscFunctionReturn(0); 884 } 885 886 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm) 887 { 888 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 889 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 890 PetscReal step = 1.0, gnorm2, gd, dnorm = 0.0; 891 PetscReal gnorm2_old, f_old, resnorm, gnorm_old; 892 PetscBool pcgd_fallback = PETSC_FALSE; 893 894 PetscFunctionBegin; 895 /* We are now going to perform a line search along the direction. */ 896 /* Store solution and gradient info before it changes */ 897 PetscCall(VecCopy(tao->solution, cg->X_old)); 898 PetscCall(VecCopy(tao->gradient, cg->G_old)); 899 PetscCall(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old)); 900 901 gnorm_old = gnorm; 902 gnorm2_old = gnorm_old * gnorm_old; 903 f_old = cg->f; 904 /* Perform bounded line search. If we are recycling a solution from a previous */ 905 /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */ 906 if (!(tao->recycle && 0 == tao->niter)) { 907 /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */ 908 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 909 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 910 PetscCall(TaoAddLineSearchCounts(tao)); 911 912 /* Check linesearch failure */ 913 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 914 ++cg->ls_fails; 915 if (cg->cg_type == CG_GradientDescent) { 916 /* Nothing left to do but fail out of the optimization */ 917 step = 0.0; 918 tao->reason = TAO_DIVERGED_LS_FAILURE; 919 } else { 920 /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */ 921 PetscCall(VecCopy(cg->X_old, tao->solution)); 922 PetscCall(VecCopy(cg->G_old, tao->gradient)); 923 PetscCall(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient)); 924 gnorm = gnorm_old; 925 gnorm2 = gnorm2_old; 926 cg->f = f_old; 927 928 /* Fall back on preconditioned CG (so long as you're not already using it) */ 929 if (cg->cg_type != CG_PCGradientDescent && cg->diag_scaling) { 930 pcgd_fallback = PETSC_TRUE; 931 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 932 933 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 934 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 935 936 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 937 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 938 PetscCall(TaoAddLineSearchCounts(tao)); 939 940 pcgd_fallback = PETSC_FALSE; 941 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 942 /* Going to perform a regular gradient descent step. */ 943 ++cg->ls_fails; 944 step = 0.0; 945 } 946 } 947 /* Fall back on the scaled gradient step */ 948 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 949 ++cg->ls_fails; 950 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 951 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 952 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 953 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 954 PetscCall(TaoAddLineSearchCounts(tao)); 955 } 956 957 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 958 /* Nothing left to do but fail out of the optimization */ 959 ++cg->ls_fails; 960 step = 0.0; 961 tao->reason = TAO_DIVERGED_LS_FAILURE; 962 } else { 963 /* One of the fallbacks worked. Set them both back equal to false. */ 964 pcgd_fallback = PETSC_FALSE; 965 } 966 } 967 } 968 /* Convergence test for line search failure */ 969 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 970 971 /* Standard convergence test */ 972 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 973 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 974 PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 975 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 976 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 977 PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 978 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); 979 } 980 /* Assert we have an updated step and we need at least one more iteration. */ 981 /* Calculate the next direction */ 982 /* Estimate the active set at the new solution */ 983 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 984 /* Compute the projected gradient and its norm */ 985 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 986 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 987 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 988 gnorm2 = gnorm * gnorm; 989 990 /* Calculate some quantities used in the StepDirectionUpdate. */ 991 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 992 /* Update the step direction. */ 993 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 994 ++tao->niter; 995 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 996 997 if (cg->cg_type != CG_GradientDescent) { 998 /* Figure out which previously active variables became inactive this iteration */ 999 PetscCall(ISDestroy(&cg->new_inactives)); 1000 if (cg->inactive_idx && cg->inactive_old) PetscCall(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives)); 1001 /* Selectively reset the CG step those freshly inactive variables */ 1002 if (cg->new_inactives) { 1003 PetscCall(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 1004 PetscCall(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 1005 PetscCall(VecCopy(cg->inactive_grad, cg->inactive_step)); 1006 PetscCall(VecScale(cg->inactive_step, -1.0)); 1007 PetscCall(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 1008 PetscCall(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 1009 } 1010 /* Verify that this is a descent direction */ 1011 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 1012 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 1013 if (PetscIsInfOrNanReal(gd) || (gd / (dnorm * dnorm) <= -1e10 || gd / (dnorm * dnorm) >= -1e-10)) { 1014 /* Not a descent direction, so we reset back to projected gradient descent */ 1015 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 1016 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 1017 ++cg->descent_error; 1018 } else { 1019 } 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 1024 PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0) 1025 { 1026 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 1027 1028 PetscFunctionBegin; 1029 PetscCall(PetscObjectReference((PetscObject)H0)); 1030 cg->pc = H0; 1031 PetscFunctionReturn(0); 1032 } 1033