1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/ 3 #include <petscksp.h> 4 5 const char *const TaoBNCGTypes[] = {"gd", "pcgd", "hs", "fr", "prp", "prp_plus", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "TAOBNCGType", "TAO_BNCG_", NULL}; 6 7 #define CG_AS_NONE 0 8 #define CG_AS_BERTSEKAS 1 9 #define CG_AS_SIZE 2 10 11 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 12 13 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 14 { 15 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 16 17 PetscFunctionBegin; 18 PetscCall(ISDestroy(&cg->inactive_old)); 19 if (cg->inactive_idx) { 20 PetscCall(ISDuplicate(cg->inactive_idx, &cg->inactive_old)); 21 PetscCall(ISCopy(cg->inactive_idx, cg->inactive_old)); 22 } 23 switch (asType) { 24 case CG_AS_NONE: 25 PetscCall(ISDestroy(&cg->inactive_idx)); 26 PetscCall(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx)); 27 PetscCall(ISDestroy(&cg->active_idx)); 28 PetscCall(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx)); 29 break; 30 31 case CG_AS_BERTSEKAS: 32 /* Use gradient descent to estimate the active set */ 33 PetscCall(VecCopy(cg->unprojected_gradient, cg->W)); 34 PetscCall(VecScale(cg->W, -1.0)); 35 PetscCall(TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx)); 36 break; 37 default: 38 break; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) 44 { 45 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 46 47 PetscFunctionBegin; 48 switch (asType) { 49 case CG_AS_NONE: 50 PetscCall(VecISSet(step, cg->active_idx, 0.0)); 51 break; 52 53 case CG_AS_BERTSEKAS: 54 PetscCall(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step)); 55 break; 56 57 default: 58 break; 59 } 60 PetscFunctionReturn(PETSC_SUCCESS); 61 } 62 63 static PetscErrorCode TaoSolve_BNCG(Tao tao) 64 { 65 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 66 PetscReal step = 1.0, gnorm, gnorm2, resnorm; 67 PetscInt nDiff; 68 69 PetscFunctionBegin; 70 /* Project the current point onto the feasible set */ 71 PetscCall(TaoComputeVariableBounds(tao)); 72 PetscCall(TaoLineSearchSetVariableBounds(tao->linesearch, tao->XL, tao->XU)); 73 74 /* Project the initial point onto the feasible region */ 75 PetscCall(TaoBoundSolution(tao->solution, tao->XL, tao->XU, 0.0, &nDiff, tao->solution)); 76 77 if (nDiff > 0 || !tao->recycle) PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 78 PetscCall(VecNorm(cg->unprojected_gradient, NORM_2, &gnorm)); 79 PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 80 81 /* Estimate the active set and compute the projected gradient */ 82 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 83 84 /* Project the gradient and calculate the norm */ 85 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 86 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 87 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 88 gnorm2 = gnorm * gnorm; 89 90 /* Initialize counters */ 91 tao->niter = 0; 92 cg->ls_fails = cg->descent_error = 0; 93 cg->resets = -1; 94 cg->skipped_updates = cg->pure_gd_steps = 0; 95 cg->iter_quad = 0; 96 97 /* Convergence test at the starting point. */ 98 tao->reason = TAO_CONTINUE_ITERATING; 99 100 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 101 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 102 PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 103 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 104 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 105 PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 106 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS); 107 /* Calculate initial direction. */ 108 if (!tao->recycle) { 109 /* We are not recycling a solution/history from a past TaoSolve */ 110 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 111 } 112 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */ 113 while (1) { 114 /* Call general purpose update function */ 115 if (tao->ops->update) { 116 PetscUseTypeMethod(tao, update, tao->niter, tao->user_update); 117 PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 118 } 119 PetscCall(TaoBNCGConductIteration(tao, gnorm)); 120 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 } 123 124 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 125 { 126 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 127 128 PetscFunctionBegin; 129 if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient)); 130 if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection)); 131 if (!cg->W) PetscCall(VecDuplicate(tao->solution, &cg->W)); 132 if (!cg->work) PetscCall(VecDuplicate(tao->solution, &cg->work)); 133 if (!cg->sk) PetscCall(VecDuplicate(tao->solution, &cg->sk)); 134 if (!cg->yk) PetscCall(VecDuplicate(tao->gradient, &cg->yk)); 135 if (!cg->X_old) PetscCall(VecDuplicate(tao->solution, &cg->X_old)); 136 if (!cg->G_old) PetscCall(VecDuplicate(tao->gradient, &cg->G_old)); 137 if (cg->diag_scaling) { 138 PetscCall(VecDuplicate(tao->solution, &cg->d_work)); 139 PetscCall(VecDuplicate(tao->solution, &cg->y_work)); 140 PetscCall(VecDuplicate(tao->solution, &cg->g_work)); 141 } 142 if (!cg->unprojected_gradient) PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient)); 143 if (!cg->unprojected_gradient_old) PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient_old)); 144 PetscCall(MatLMVMAllocate(cg->B, cg->sk, cg->yk)); 145 if (cg->pc) PetscCall(MatLMVMSetJ0(cg->B, cg->pc)); 146 PetscFunctionReturn(PETSC_SUCCESS); 147 } 148 149 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 150 { 151 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 152 153 PetscFunctionBegin; 154 if (tao->setupcalled) { 155 PetscCall(VecDestroy(&cg->W)); 156 PetscCall(VecDestroy(&cg->work)); 157 PetscCall(VecDestroy(&cg->X_old)); 158 PetscCall(VecDestroy(&cg->G_old)); 159 PetscCall(VecDestroy(&cg->unprojected_gradient)); 160 PetscCall(VecDestroy(&cg->unprojected_gradient_old)); 161 PetscCall(VecDestroy(&cg->g_work)); 162 PetscCall(VecDestroy(&cg->d_work)); 163 PetscCall(VecDestroy(&cg->y_work)); 164 PetscCall(VecDestroy(&cg->sk)); 165 PetscCall(VecDestroy(&cg->yk)); 166 } 167 PetscCall(ISDestroy(&cg->active_lower)); 168 PetscCall(ISDestroy(&cg->active_upper)); 169 PetscCall(ISDestroy(&cg->active_fixed)); 170 PetscCall(ISDestroy(&cg->active_idx)); 171 PetscCall(ISDestroy(&cg->inactive_idx)); 172 PetscCall(ISDestroy(&cg->inactive_old)); 173 PetscCall(ISDestroy(&cg->new_inactives)); 174 PetscCall(MatDestroy(&cg->B)); 175 if (cg->pc) PetscCall(MatDestroy(&cg->pc)); 176 PetscCall(PetscFree(tao->data)); 177 PetscFunctionReturn(PETSC_SUCCESS); 178 } 179 180 static PetscErrorCode TaoSetFromOptions_BNCG(Tao tao, PetscOptionItems *PetscOptionsObject) 181 { 182 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 183 184 PetscFunctionBegin; 185 PetscOptionsHeadBegin(PetscOptionsObject, "Nonlinear Conjugate Gradient method for unconstrained optimization"); 186 PetscCall(PetscOptionsEnum("-tao_bncg_type", "CG update formula", "TaoBNCGTypes", TaoBNCGTypes, (PetscEnum)cg->cg_type, (PetscEnum *)&cg->cg_type, NULL)); 187 if (cg->cg_type != TAO_BNCG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */ 188 if (TAO_BNCG_GD == cg->cg_type) { 189 cg->cg_type = TAO_BNCG_PCGD; 190 /* Set scaling equal to none or, at best, scalar scaling. */ 191 cg->unscaled_restart = PETSC_TRUE; 192 cg->diag_scaling = PETSC_FALSE; 193 } 194 PetscCall(PetscOptionsReal("-tao_bncg_hz_eta", "(developer) cutoff tolerance for HZ", "", cg->hz_eta, &cg->hz_eta, NULL)); 195 PetscCall(PetscOptionsReal("-tao_bncg_eps", "(developer) cutoff value for restarts", "", cg->epsilon, &cg->epsilon, NULL)); 196 PetscCall(PetscOptionsReal("-tao_bncg_dk_eta", "(developer) cutoff tolerance for DK", "", cg->dk_eta, &cg->dk_eta, NULL)); 197 PetscCall(PetscOptionsReal("-tao_bncg_xi", "(developer) Parameter in the KD method", "", cg->xi, &cg->xi, NULL)); 198 PetscCall(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL)); 199 PetscCall(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL)); 200 PetscCall(PetscOptionsReal("-tao_bncg_alpha", "(developer) parameter for the scalar scaling", "", cg->alpha, &cg->alpha, NULL)); 201 PetscCall(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL)); 202 PetscCall(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL)); 203 PetscCall(PetscOptionsBool("-tao_bncg_diag_scaling", "Enable diagonal Broyden-like preconditioning", "", cg->diag_scaling, &cg->diag_scaling, NULL)); 204 PetscCall(PetscOptionsBool("-tao_bncg_dynamic_restart", "(developer) use dynamic restarts as in HZ, DK, KD", "", cg->use_dynamic_restart, &cg->use_dynamic_restart, NULL)); 205 PetscCall(PetscOptionsBool("-tao_bncg_unscaled_restart", "(developer) use unscaled gradient restarts", "", cg->unscaled_restart, &cg->unscaled_restart, NULL)); 206 PetscCall(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL)); 207 PetscCall(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL)); 208 PetscCall(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL)); 209 PetscCall(PetscOptionsBool("-tao_bncg_spaced_restart", "(developer) Enable regular steepest descent restarting every fixed number of iterations", "", cg->spaced_restart, &cg->spaced_restart, NULL)); 210 PetscCall(PetscOptionsBool("-tao_bncg_no_scaling", "Disable all scaling except in restarts", "", cg->no_scaling, &cg->no_scaling, NULL)); 211 if (cg->no_scaling) { 212 cg->diag_scaling = PETSC_FALSE; 213 cg->alpha = -1.0; 214 } 215 if (cg->alpha == -1.0 && cg->cg_type == TAO_BNCG_KD && !cg->diag_scaling) { /* Some more default options that appear to be good. */ 216 cg->neg_xi = PETSC_TRUE; 217 } 218 PetscCall(PetscOptionsBool("-tao_bncg_neg_xi", "(developer) Use negative xi when it might be a smaller descent direction than necessary", "", cg->neg_xi, &cg->neg_xi, NULL)); 219 PetscCall(PetscOptionsEList("-tao_bncg_as_type", "active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->as_type], &cg->as_type, NULL)); 220 PetscCall(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables", "", cg->as_tol, &cg->as_tol, NULL)); 221 PetscCall(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables", "", cg->as_step, &cg->as_step, NULL)); 222 PetscCall(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts", "", cg->delta_min, &cg->delta_min, NULL)); 223 PetscCall(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts", "", cg->delta_max, &cg->delta_max, NULL)); 224 225 PetscOptionsHeadEnd(); 226 PetscCall(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix)); 227 PetscCall(MatAppendOptionsPrefix(cg->B, "tao_bncg_")); 228 PetscCall(MatSetFromOptions(cg->B)); 229 PetscFunctionReturn(PETSC_SUCCESS); 230 } 231 232 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 233 { 234 PetscBool isascii; 235 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 236 237 PetscFunctionBegin; 238 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 239 if (isascii) { 240 PetscCall(PetscViewerASCIIPushTab(viewer)); 241 PetscCall(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", TaoBNCGTypes[cg->cg_type])); 242 PetscCall(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %" PetscInt_FMT "\n", cg->skipped_updates)); 243 PetscCall(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %" PetscInt_FMT "\n", cg->resets)); 244 PetscCall(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %" PetscInt_FMT "\n", cg->pure_gd_steps)); 245 PetscCall(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %" PetscInt_FMT "\n", cg->descent_error)); 246 PetscCall(PetscViewerASCIIPrintf(viewer, "Line search fails: %" PetscInt_FMT "\n", cg->ls_fails)); 247 if (cg->diag_scaling) { 248 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 249 if (isascii) { 250 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 251 PetscCall(MatView(cg->B, viewer)); 252 PetscCall(PetscViewerPopFormat(viewer)); 253 } 254 } 255 PetscCall(PetscViewerASCIIPopTab(viewer)); 256 } 257 PetscFunctionReturn(PETSC_SUCCESS); 258 } 259 260 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha) 261 { 262 PetscReal a, b, c, sig1, sig2; 263 264 PetscFunctionBegin; 265 *scale = 0.0; 266 if (1.0 == alpha) *scale = yts / yty; 267 else if (0.0 == alpha) *scale = sts / yts; 268 else if (-1.0 == alpha) *scale = 1.0; 269 else { 270 a = yty; 271 b = yts; 272 c = sts; 273 a *= alpha; 274 b *= -(2.0 * alpha - 1.0); 275 c *= alpha - 1.0; 276 sig1 = (-b + PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a); 277 sig2 = (-b - PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a); 278 /* accept the positive root as the scalar */ 279 if (sig1 > 0.0) *scale = sig1; 280 else if (sig2 > 0.0) *scale = sig2; 281 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar"); 282 } 283 PetscFunctionReturn(PETSC_SUCCESS); 284 } 285 286 /*MC 287 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 288 289 Options Database Keys: 290 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled) 291 . -tao_bncg_eta <r> - restart tolerance 292 . -tao_bncg_type <taocg_type> - cg formula 293 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 294 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 295 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 296 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision. 297 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method 298 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 299 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 300 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method 301 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method 302 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method 303 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method 304 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True. 305 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods 306 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts 307 . -tao_bncg_zeta <r> - Scaling parameter in the KD method 308 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps 309 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps 310 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart 311 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem 312 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations 313 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults. 314 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions 315 316 Notes: 317 CG formulas are: 318 + "gd" - Gradient Descent 319 . "fr" - Fletcher-Reeves 320 . "pr" - Polak-Ribiere-Polyak 321 . "prp" - Polak-Ribiere-Plus 322 . "hs" - Hestenes-Steifel 323 . "dy" - Dai-Yuan 324 . "ssml_bfgs" - Self-Scaling Memoryless BFGS 325 . "ssml_dfp" - Self-Scaling Memoryless DFP 326 . "ssml_brdn" - Self-Scaling Memoryless Broyden 327 . "hz" - Hager-Zhang (CG_DESCENT 5.3) 328 . "dk" - Dai-Kou (2013) 329 - "kd" - Kou-Dai (2015) 330 331 Level: beginner 332 M*/ 333 334 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 335 { 336 TAO_BNCG *cg; 337 const char *morethuente_type = TAOLINESEARCHMT; 338 339 PetscFunctionBegin; 340 tao->ops->setup = TaoSetUp_BNCG; 341 tao->ops->solve = TaoSolve_BNCG; 342 tao->ops->view = TaoView_BNCG; 343 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 344 tao->ops->destroy = TaoDestroy_BNCG; 345 346 /* Override default settings (unless already changed) */ 347 if (!tao->max_it_changed) tao->max_it = 2000; 348 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 349 350 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 351 /* method. In particular, gtol should be less that 0.5; the value used in */ 352 /* Nocedal and Wright is 0.10. We use the default values for the */ 353 /* linesearch because it seems to work better. */ 354 PetscCall(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch)); 355 PetscCall(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1)); 356 PetscCall(TaoLineSearchSetType(tao->linesearch, morethuente_type)); 357 PetscCall(TaoLineSearchUseTaoRoutines(tao->linesearch, tao)); 358 359 PetscCall(PetscNew(&cg)); 360 tao->data = (void *)cg; 361 PetscCall(KSPInitializePackage()); 362 PetscCall(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B)); 363 PetscCall(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1)); 364 PetscCall(MatSetType(cg->B, MATLMVMDIAGBROYDEN)); 365 366 cg->pc = NULL; 367 368 cg->dk_eta = 0.5; 369 cg->hz_eta = 0.4; 370 cg->dynamic_restart = PETSC_FALSE; 371 cg->unscaled_restart = PETSC_FALSE; 372 cg->no_scaling = PETSC_FALSE; 373 cg->delta_min = 1e-7; 374 cg->delta_max = 100; 375 cg->theta = 1.0; 376 cg->hz_theta = 1.0; 377 cg->dfp_scale = 1.0; 378 cg->bfgs_scale = 1.0; 379 cg->zeta = 0.1; 380 cg->min_quad = 6; 381 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/ 382 cg->xi = 1.0; 383 cg->neg_xi = PETSC_TRUE; 384 cg->spaced_restart = PETSC_FALSE; 385 cg->tol_quad = 1e-8; 386 cg->as_step = 0.001; 387 cg->as_tol = 0.001; 388 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0 / 3.0); /* Just a little tighter*/ 389 cg->as_type = CG_AS_BERTSEKAS; 390 cg->cg_type = TAO_BNCG_SSML_BFGS; 391 cg->alpha = 1.0; 392 cg->diag_scaling = PETSC_TRUE; 393 PetscFunctionReturn(PETSC_SUCCESS); 394 } 395 396 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq) 397 { 398 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 399 PetscReal scaling; 400 401 PetscFunctionBegin; 402 ++cg->resets; 403 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23); 404 scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling)); 405 if (cg->unscaled_restart) { 406 scaling = 1.0; 407 ++cg->pure_gd_steps; 408 } 409 PetscCall(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient)); 410 /* Also want to reset our diagonal scaling with each restart */ 411 if (cg->diag_scaling) PetscCall(MatLMVMReset(cg->B, PETSC_FALSE)); 412 PetscFunctionReturn(PETSC_SUCCESS); 413 } 414 415 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold) 416 { 417 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 418 PetscReal quadinterp; 419 420 PetscFunctionBegin; 421 if (cg->f < cg->min_quad / 10) { 422 *dynrestart = PETSC_FALSE; 423 PetscFunctionReturn(PETSC_SUCCESS); 424 } /* just skip this since this strategy doesn't work well for functions near zero */ 425 quadinterp = 2.0 * (cg->f - fold) / (stepsize * (gd + gd_old)); 426 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad; 427 else { 428 cg->iter_quad = 0; 429 *dynrestart = PETSC_FALSE; 430 } 431 if (cg->iter_quad >= cg->min_quad) { 432 cg->iter_quad = 0; 433 *dynrestart = PETSC_TRUE; 434 } 435 PetscFunctionReturn(PETSC_SUCCESS); 436 } 437 438 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback) 439 { 440 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 441 PetscReal gamma = 1.0, tau_k, beta; 442 PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk = 1.0, gd; 443 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg; 444 PetscInt dim; 445 PetscBool cg_restart = PETSC_FALSE; 446 PetscFunctionBegin; 447 448 /* Local curvature check to see if we need to restart */ 449 if (tao->niter >= 1 || tao->recycle) { 450 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 451 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 452 ynorm2 = ynorm * ynorm; 453 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 454 if (step * dnorm < PETSC_MACHINE_EPSILON || step * dk_yk < PETSC_MACHINE_EPSILON) { 455 cg_restart = PETSC_TRUE; 456 ++cg->skipped_updates; 457 } 458 if (cg->spaced_restart) { 459 PetscCall(VecGetSize(tao->gradient, &dim)); 460 if (tao->niter % (dim * cg->min_restart_num)) cg_restart = PETSC_TRUE; 461 } 462 } 463 /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */ 464 if (cg->spaced_restart) { 465 PetscCall(VecGetSize(tao->gradient, &dim)); 466 if (0 == tao->niter % (6 * dim)) cg_restart = PETSC_TRUE; 467 } 468 /* Compute the diagonal scaling vector if applicable */ 469 if (cg->diag_scaling) PetscCall(MatLMVMUpdate(cg->B, tao->solution, tao->gradient)); 470 471 /* A note on diagonal scaling (to be added to paper): 472 For the FR, PR, PRP, and DY methods, the diagonally scaled versions 473 must be derived as a preconditioned CG method rather than as 474 a Hessian initialization like in the Broyden methods. */ 475 476 /* In that case, one writes the objective function as 477 f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k). 478 Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to 479 HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the 480 same under preconditioning. Note that A is diagonal, such that A^T = A. */ 481 482 /* This yields questions like what the dot product d_k^T y_k 483 should look like. HZ mistakenly treats that as the same under 484 preconditioning, but that is not necessarily true. */ 485 486 /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation, 487 we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}), 488 yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is 489 NOT the same if our preconditioning matrix is updated between iterations. 490 This same issue is found when considering dot products of the form g_{k+1}^T y_k. */ 491 492 /* Compute CG step direction */ 493 if (cg_restart) { 494 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 495 } else if (pcgd_fallback) { 496 /* Just like preconditioned CG */ 497 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 498 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 499 } else if (ynorm2 > PETSC_MACHINE_EPSILON) { 500 switch (cg->cg_type) { 501 case TAO_BNCG_PCGD: 502 if (!cg->diag_scaling) { 503 if (!cg->no_scaling) { 504 cg->sts = step * step * dnorm * dnorm; 505 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha)); 506 } else { 507 tau_k = 1.0; 508 ++cg->pure_gd_steps; 509 } 510 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient)); 511 } else { 512 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 513 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 514 } 515 break; 516 517 case TAO_BNCG_HS: 518 /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */ 519 if (!cg->diag_scaling) { 520 cg->sts = step * step * dnorm * dnorm; 521 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 522 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha)); 523 beta = tau_k * gkp1_yk / dk_yk; 524 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 525 } else { 526 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 527 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 528 beta = gkp1_yk / dk_yk; 529 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 530 } 531 break; 532 533 case TAO_BNCG_FR: 534 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 535 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 536 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 537 ynorm2 = ynorm * ynorm; 538 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 539 if (!cg->diag_scaling) { 540 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, step * step * dnorm * dnorm, &tau_k, cg->alpha)); 541 beta = tau_k * gnorm2 / gnorm2_old; 542 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 543 } else { 544 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */ 545 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 546 PetscCall(VecDot(tao->gradient, cg->g_work, &tmp)); 547 beta = tmp / gnorm2_old; 548 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 549 } 550 break; 551 552 case TAO_BNCG_PRP: 553 snorm = step * dnorm; 554 if (!cg->diag_scaling) { 555 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 556 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 557 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha)); 558 beta = tau_k * gkp1_yk / gnorm2_old; 559 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 560 } else { 561 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); 562 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 563 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 564 beta = gkp1_yk / gnorm2_old; 565 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 566 } 567 break; 568 569 case TAO_BNCG_PRP_PLUS: 570 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 571 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 572 ynorm2 = ynorm * ynorm; 573 if (!cg->diag_scaling) { 574 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 575 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 576 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha)); 577 beta = tau_k * gkp1_yk / gnorm2_old; 578 beta = PetscMax(beta, 0.0); 579 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 580 } else { 581 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */ 582 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 583 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 584 beta = gkp1_yk / gnorm2_old; 585 beta = PetscMax(beta, 0.0); 586 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 587 } 588 break; 589 590 case TAO_BNCG_DY: 591 /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property." 592 SIAM Journal on optimization 10, no. 1 (1999): 177-182. */ 593 if (!cg->diag_scaling) { 594 PetscCall(VecDot(tao->stepdirection, tao->gradient, &gd)); 595 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 596 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->yts, &tau_k, cg->alpha)); 597 beta = tau_k * gnorm2 / (gd - gd_old); 598 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 599 } else { 600 PetscCall(MatMult(cg->B, tao->stepdirection, cg->d_work)); 601 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 602 PetscCall(VecDot(cg->g_work, tao->gradient, >Dg)); 603 PetscCall(VecDot(tao->stepdirection, cg->G_old, &gd_old)); 604 PetscCall(VecDot(cg->d_work, cg->g_work, &dk_yk)); 605 dk_yk = dk_yk - gd_old; 606 beta = gtDg / dk_yk; 607 PetscCall(VecScale(cg->d_work, beta)); 608 PetscCall(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work)); 609 } 610 break; 611 612 case TAO_BNCG_HZ: 613 /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent." 614 ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */ 615 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 616 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 617 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 618 snorm = dnorm * step; 619 cg->yts = step * dk_yk; 620 if (cg->use_dynamic_restart) PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 621 if (cg->dynamic_restart) { 622 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 623 } else { 624 if (!cg->diag_scaling) { 625 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 626 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha)); 627 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */ 628 tmp = gd / dk_yk; 629 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk)); 630 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */ 631 beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm)); 632 /* d <- -t*g + beta*t*d */ 633 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 634 } else { 635 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 636 cg->yty = ynorm2; 637 cg->sts = snorm * snorm; 638 /* Apply the diagonal scaling to all my vectors */ 639 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 640 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 641 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 642 /* Construct the constant ytDgkp1 */ 643 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 644 /* Construct the constant for scaling Dkyk in the update */ 645 tmp = gd / dk_yk; 646 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 647 tau_k = -tau_k * gd / (dk_yk * dk_yk); 648 /* beta is the constant which adds the dk contribution */ 649 beta = gkp1_yk / dk_yk + cg->hz_theta * tau_k; /* HZ; (1.15) from DK 2013 */ 650 /* From HZ2013, modified to account for diagonal scaling*/ 651 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 652 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 653 beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm)); 654 /* Do the update */ 655 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 656 } 657 } 658 break; 659 660 case TAO_BNCG_DK: 661 /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search." 662 SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */ 663 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 664 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 665 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 666 snorm = step * dnorm; 667 cg->yts = dk_yk * step; 668 if (!cg->diag_scaling) { 669 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 670 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha)); 671 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */ 672 tmp = gd / dk_yk; 673 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk) + gd / (dnorm * dnorm)) - step * gd / dk_yk; 674 beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm)); 675 /* d <- -t*g + beta*t*d */ 676 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk)); 677 } else { 678 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 679 cg->yty = ynorm2; 680 cg->sts = snorm * snorm; 681 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 682 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 683 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 684 /* Construct the constant ytDgkp1 */ 685 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 686 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 687 tau_k = tau_k * gd / (dk_yk * dk_yk); 688 tmp = gd / dk_yk; 689 /* beta is the constant which adds the dk contribution */ 690 beta = gkp1_yk / dk_yk - step * tmp - tau_k; 691 /* Update this for the last term in beta */ 692 PetscCall(VecDot(cg->y_work, tao->stepdirection, &dk_yk)); 693 beta += tmp * dk_yk / (dnorm * dnorm); /* projection of y_work onto dk */ 694 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 695 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 696 beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm)); 697 /* Do the update */ 698 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 699 } 700 break; 701 702 case TAO_BNCG_KD: 703 /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization." 704 Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */ 705 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 706 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 707 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 708 snorm = step * dnorm; 709 cg->yts = dk_yk * step; 710 if (cg->use_dynamic_restart) PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 711 if (cg->dynamic_restart) { 712 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 713 } else { 714 if (!cg->diag_scaling) { 715 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 716 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha)); 717 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk)) - step * gd / dk_yk; 718 if (beta < cg->zeta * tau_k * gd / (dnorm * dnorm)) /* 0.1 is KD's zeta parameter */ 719 { 720 beta = cg->zeta * tau_k * gd / (dnorm * dnorm); 721 gamma = 0.0; 722 } else { 723 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0 * gd / dk_yk; 724 /* This seems to be very effective when there's no tau_k scaling. 725 This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */ 726 else gamma = cg->xi * gd / dk_yk; 727 } 728 /* d <- -t*g + beta*t*d + t*tmp*yk */ 729 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma * tau_k, beta, tao->gradient, cg->yk)); 730 } else { 731 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 732 cg->yty = ynorm2; 733 cg->sts = snorm * snorm; 734 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 735 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 736 /* Construct the constant ytDgkp1 */ 737 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1D_yk)); 738 /* Construct the constant for scaling Dkyk in the update */ 739 gamma = gd / dk_yk; 740 /* tau_k = -ytDy/(ytd)^2 * gd */ 741 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 742 tau_k = tau_k * gd / (dk_yk * dk_yk); 743 /* beta is the constant which adds the d_k contribution */ 744 beta = gkp1D_yk / dk_yk - step * gamma - tau_k; 745 /* Here is the requisite check */ 746 PetscCall(VecDot(tao->stepdirection, cg->g_work, &tmp)); 747 if (cg->neg_xi) { 748 /* modified KD implementation */ 749 if (gkp1D_yk / dk_yk < 0) gamma = -1.0 * gd / dk_yk; 750 else gamma = cg->xi * gd / dk_yk; 751 if (beta < cg->zeta * tmp / (dnorm * dnorm)) { 752 beta = cg->zeta * tmp / (dnorm * dnorm); 753 gamma = 0.0; 754 } 755 } else { /* original KD 2015 implementation */ 756 if (beta < cg->zeta * tmp / (dnorm * dnorm)) { 757 beta = cg->zeta * tmp / (dnorm * dnorm); 758 gamma = 0.0; 759 } else gamma = cg->xi * gd / dk_yk; 760 } 761 /* Do the update in two steps */ 762 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 763 PetscCall(VecAXPY(tao->stepdirection, gamma, cg->y_work)); 764 } 765 } 766 break; 767 768 case TAO_BNCG_SSML_BFGS: 769 /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory." 770 Discussion Papers 269 (1977). */ 771 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 772 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 773 snorm = step * dnorm; 774 cg->yts = dk_yk * step; 775 cg->yty = ynorm2; 776 cg->sts = snorm * snorm; 777 if (!cg->diag_scaling) { 778 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 779 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 780 tmp = gd / dk_yk; 781 beta = tau_k * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * tmp; 782 /* d <- -t*g + beta*t*d + t*tmp*yk */ 783 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp * tau_k, beta, tao->gradient, cg->yk)); 784 } else { 785 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 786 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 787 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 788 /* compute scalar gamma */ 789 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 790 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 791 gamma = gd / dk_yk; 792 /* Compute scalar beta */ 793 beta = (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk; 794 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 795 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 796 } 797 break; 798 799 case TAO_BNCG_SSML_DFP: 800 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 801 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 802 snorm = step * dnorm; 803 cg->yts = dk_yk * step; 804 cg->yty = ynorm2; 805 cg->sts = snorm * snorm; 806 if (!cg->diag_scaling) { 807 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 808 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 809 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 810 tau_k = cg->dfp_scale * tau_k; 811 tmp = tau_k * gkp1_yk / cg->yty; 812 beta = -step * gd / dk_yk; 813 /* d <- -t*g + beta*d + tmp*yk */ 814 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 815 } else { 816 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */ 817 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 818 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 819 /* compute scalar gamma */ 820 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 821 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 822 gamma = (gkp1_yk / tmp); 823 /* Compute scalar beta */ 824 beta = -step * gd / dk_yk; 825 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 826 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 827 } 828 break; 829 830 case TAO_BNCG_SSML_BRDN: 831 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 832 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 833 snorm = step * dnorm; 834 cg->yts = step * dk_yk; 835 cg->yty = ynorm2; 836 cg->sts = snorm * snorm; 837 if (!cg->diag_scaling) { 838 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 839 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_bfgs, cg->bfgs_scale)); 840 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_dfp, cg->dfp_scale)); 841 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 842 tau_k = cg->theta * tau_bfgs + (1.0 - cg->theta) * tau_dfp; 843 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0, 844 it should reproduce the tau_dfp scaling. Same with dfp_scale. */ 845 tmp = cg->theta * tau_bfgs * gd / dk_yk + (1 - cg->theta) * tau_dfp * gkp1_yk / cg->yty; 846 beta = cg->theta * tau_bfgs * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * gd / dk_yk; 847 /* d <- -t*g + beta*d + tmp*yk */ 848 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 849 } else { 850 /* We have diagonal scaling enabled */ 851 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 852 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 853 /* compute scalar gamma */ 854 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 855 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 856 gamma = cg->theta * gd / dk_yk + (1 - cg->theta) * (gkp1_yk / tmp); 857 /* Compute scalar beta */ 858 beta = cg->theta * (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk; 859 /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 860 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 861 } 862 break; 863 864 default: 865 break; 866 } 867 } 868 PetscFunctionReturn(PETSC_SUCCESS); 869 } 870 871 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm) 872 { 873 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 874 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 875 PetscReal step = 1.0, gnorm2, gd, dnorm = 0.0; 876 PetscReal gnorm2_old, f_old, resnorm, gnorm_old; 877 PetscBool pcgd_fallback = PETSC_FALSE; 878 879 PetscFunctionBegin; 880 /* We are now going to perform a line search along the direction. */ 881 /* Store solution and gradient info before it changes */ 882 PetscCall(VecCopy(tao->solution, cg->X_old)); 883 PetscCall(VecCopy(tao->gradient, cg->G_old)); 884 PetscCall(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old)); 885 886 gnorm_old = gnorm; 887 gnorm2_old = gnorm_old * gnorm_old; 888 f_old = cg->f; 889 /* Perform bounded line search. If we are recycling a solution from a previous */ 890 /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */ 891 if (!(tao->recycle && 0 == tao->niter)) { 892 /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */ 893 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 894 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 895 PetscCall(TaoAddLineSearchCounts(tao)); 896 897 /* Check linesearch failure */ 898 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 899 ++cg->ls_fails; 900 if (cg->cg_type == TAO_BNCG_GD) { 901 /* Nothing left to do but fail out of the optimization */ 902 step = 0.0; 903 tao->reason = TAO_DIVERGED_LS_FAILURE; 904 } else { 905 /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */ 906 PetscCall(VecCopy(cg->X_old, tao->solution)); 907 PetscCall(VecCopy(cg->G_old, tao->gradient)); 908 PetscCall(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient)); 909 gnorm = gnorm_old; 910 gnorm2 = gnorm2_old; 911 cg->f = f_old; 912 913 /* Fall back on preconditioned CG (so long as you're not already using it) */ 914 if (cg->cg_type != TAO_BNCG_PCGD && cg->diag_scaling) { 915 pcgd_fallback = PETSC_TRUE; 916 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 917 918 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 919 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 920 921 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 922 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 923 PetscCall(TaoAddLineSearchCounts(tao)); 924 925 pcgd_fallback = PETSC_FALSE; 926 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 927 /* Going to perform a regular gradient descent step. */ 928 ++cg->ls_fails; 929 step = 0.0; 930 } 931 } 932 /* Fall back on the scaled gradient step */ 933 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 934 ++cg->ls_fails; 935 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 936 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 937 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 938 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 939 PetscCall(TaoAddLineSearchCounts(tao)); 940 } 941 942 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 943 /* Nothing left to do but fail out of the optimization */ 944 ++cg->ls_fails; 945 step = 0.0; 946 tao->reason = TAO_DIVERGED_LS_FAILURE; 947 } else { 948 /* One of the fallbacks worked. Set them both back equal to false. */ 949 pcgd_fallback = PETSC_FALSE; 950 } 951 } 952 } 953 /* Convergence test for line search failure */ 954 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS); 955 956 /* Standard convergence test */ 957 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 958 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 959 PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 960 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 961 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 962 PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 963 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS); 964 } 965 /* Assert we have an updated step and we need at least one more iteration. */ 966 /* Calculate the next direction */ 967 /* Estimate the active set at the new solution */ 968 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 969 /* Compute the projected gradient and its norm */ 970 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 971 PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 972 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 973 gnorm2 = gnorm * gnorm; 974 975 /* Calculate some quantities used in the StepDirectionUpdate. */ 976 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 977 /* Update the step direction. */ 978 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 979 ++tao->niter; 980 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 981 982 if (cg->cg_type != TAO_BNCG_GD) { 983 /* Figure out which previously active variables became inactive this iteration */ 984 PetscCall(ISDestroy(&cg->new_inactives)); 985 if (cg->inactive_idx && cg->inactive_old) PetscCall(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives)); 986 /* Selectively reset the CG step those freshly inactive variables */ 987 if (cg->new_inactives) { 988 PetscCall(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 989 PetscCall(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 990 PetscCall(VecCopy(cg->inactive_grad, cg->inactive_step)); 991 PetscCall(VecScale(cg->inactive_step, -1.0)); 992 PetscCall(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 993 PetscCall(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 994 } 995 /* Verify that this is a descent direction */ 996 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 997 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 998 if (PetscIsInfOrNanReal(gd) || (gd / (dnorm * dnorm) <= -1e10 || gd / (dnorm * dnorm) >= -1e-10)) { 999 /* Not a descent direction, so we reset back to projected gradient descent */ 1000 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 1001 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 1002 ++cg->descent_error; 1003 } else { 1004 } 1005 } 1006 PetscFunctionReturn(PETSC_SUCCESS); 1007 } 1008 1009 PETSC_INTERN PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0) 1010 { 1011 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 1012 PetscBool same; 1013 1014 PetscFunctionBegin; 1015 PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same)); 1016 if (same) { 1017 PetscCall(PetscObjectReference((PetscObject)H0)); 1018 cg->pc = H0; 1019 } 1020 PetscFunctionReturn(PETSC_SUCCESS); 1021 } 1022 1023 /*@ 1024 TaoBNCGGetType - Return the type for the `TAOBNCG` solver 1025 1026 Input Parameter: 1027 . tao - the `Tao` solver context 1028 1029 Output Parameter: 1030 . type - `TAOBNCG` type 1031 1032 Level: advanced 1033 1034 .seealso: `Tao`, `TAOBNCG`, `TaoBNCGSetType()`, `TaoBNCGType` 1035 @*/ 1036 PetscErrorCode TaoBNCGGetType(Tao tao, TaoBNCGType *type) 1037 { 1038 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 1039 PetscBool same; 1040 1041 PetscFunctionBegin; 1042 PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same)); 1043 PetscCheck(same, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_INCOMP, "TAO solver is not BNCG type"); 1044 *type = cg->cg_type; 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 /*@ 1049 TaoBNCGSetType - Set the type for the `TAOBNCG` solver 1050 1051 Input Parameters: 1052 + tao - the `Tao` solver context 1053 - type - `TAOBNCG` type 1054 1055 Level: advanced 1056 1057 .seealso: `Tao`, `TAOBNCG`, `TaoBNCGGetType()`, `TaoBNCGType` 1058 @*/ 1059 PetscErrorCode TaoBNCGSetType(Tao tao, TaoBNCGType type) 1060 { 1061 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 1062 PetscBool same; 1063 1064 PetscFunctionBegin; 1065 PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same)); 1066 if (same) cg->cg_type = type; 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069