1 #include <petsctaolinesearch.h> 2 #include <../src/tao/bound/impls/bncg/bncg.h> /*I "petsctao.h" I*/ 3 #include <petscksp.h> 4 5 const char *const TaoBNCGTypes[] = {"gd", "pcgd", "hs", "fr", "prp", "prp_plus", "dy", "hz", "dk", "kd", "ssml_bfgs", "ssml_dfp", "ssml_brdn", "TAOBNCGType", "TAO_BNCG_", NULL}; 6 7 #define CG_AS_NONE 0 8 #define CG_AS_BERTSEKAS 1 9 #define CG_AS_SIZE 2 10 11 static const char *CG_AS_TYPE[64] = {"none", "bertsekas"}; 12 13 PetscErrorCode TaoBNCGEstimateActiveSet(Tao tao, PetscInt asType) 14 { 15 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 16 17 PetscFunctionBegin; 18 PetscCall(ISDestroy(&cg->inactive_old)); 19 if (cg->inactive_idx) { 20 PetscCall(ISDuplicate(cg->inactive_idx, &cg->inactive_old)); 21 PetscCall(ISCopy(cg->inactive_idx, cg->inactive_old)); 22 } 23 switch (asType) { 24 case CG_AS_NONE: 25 PetscCall(ISDestroy(&cg->inactive_idx)); 26 PetscCall(VecWhichInactive(tao->XL, tao->solution, cg->unprojected_gradient, tao->XU, PETSC_TRUE, &cg->inactive_idx)); 27 PetscCall(ISDestroy(&cg->active_idx)); 28 PetscCall(ISComplementVec(cg->inactive_idx, tao->solution, &cg->active_idx)); 29 break; 30 case CG_AS_BERTSEKAS: 31 /* Use gradient descent to estimate the active set */ 32 PetscCall(VecCopy(cg->unprojected_gradient, cg->W)); 33 PetscCall(VecScale(cg->W, -1.0)); 34 PetscCall(TaoEstimateActiveBounds(tao->solution, tao->XL, tao->XU, cg->unprojected_gradient, cg->W, cg->work, cg->as_step, &cg->as_tol, &cg->active_lower, &cg->active_upper, &cg->active_fixed, &cg->active_idx, &cg->inactive_idx)); 35 break; 36 default: 37 break; 38 } 39 PetscFunctionReturn(PETSC_SUCCESS); 40 } 41 42 PetscErrorCode TaoBNCGBoundStep(Tao tao, PetscInt asType, Vec step) 43 { 44 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 45 46 PetscFunctionBegin; 47 switch (asType) { 48 case CG_AS_NONE: 49 if (cg->active_idx) PetscCall(VecISSet(step, cg->active_idx, 0.0)); 50 break; 51 case CG_AS_BERTSEKAS: 52 PetscCall(TaoBoundStep(tao->solution, tao->XL, tao->XU, cg->active_lower, cg->active_upper, cg->active_fixed, 1.0, step)); 53 break; 54 default: 55 break; 56 } 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 static PetscErrorCode TaoSolve_BNCG(Tao tao) 61 { 62 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 63 PetscReal step = 1.0, gnorm, gnorm2, resnorm; 64 PetscInt nDiff; 65 66 PetscFunctionBegin; 67 /* Project the current point onto the feasible set */ 68 PetscCall(TaoComputeVariableBounds(tao)); 69 PetscCall(TaoLineSearchSetVariableBounds(tao->linesearch, tao->XL, tao->XU)); 70 71 /* Project the initial point onto the feasible region */ 72 PetscCall(TaoBoundSolution(tao->solution, tao->XL, tao->XU, 0.0, &nDiff, tao->solution)); 73 74 if (nDiff > 0 || !tao->recycle) PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 75 PetscCall(VecNorm(cg->unprojected_gradient, NORM_2, &gnorm)); 76 PetscCheck(!PetscIsInfOrNanReal(cg->f) && !PetscIsInfOrNanReal(gnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 77 78 /* Estimate the active set and compute the projected gradient */ 79 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 80 81 /* Project the gradient and calculate the norm */ 82 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 83 if (cg->active_idx) PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 84 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 85 gnorm2 = gnorm * gnorm; 86 87 /* Initialize counters */ 88 tao->niter = 0; 89 cg->ls_fails = cg->descent_error = 0; 90 cg->resets = -1; 91 cg->skipped_updates = cg->pure_gd_steps = 0; 92 cg->iter_quad = 0; 93 94 /* Convergence test at the starting point. */ 95 tao->reason = TAO_CONTINUE_ITERATING; 96 97 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 98 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 99 PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 100 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 101 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 102 PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 103 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS); 104 /* Calculate initial direction. */ 105 if (!tao->recycle) { 106 /* We are not recycling a solution/history from a past TaoSolve */ 107 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 108 } 109 /* Initial gradient descent step. Scaling by 1.0 also does a decent job for some problems. */ 110 while (1) { 111 /* Call general purpose update function */ 112 if (tao->ops->update) { 113 PetscUseTypeMethod(tao, update, tao->niter, tao->user_update); 114 PetscCall(TaoComputeObjectiveAndGradient(tao, tao->solution, &cg->f, cg->unprojected_gradient)); 115 } 116 PetscCall(TaoBNCGConductIteration(tao, gnorm)); 117 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 } 120 121 static PetscErrorCode TaoSetUp_BNCG(Tao tao) 122 { 123 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 124 125 PetscFunctionBegin; 126 if (!tao->gradient) PetscCall(VecDuplicate(tao->solution, &tao->gradient)); 127 if (!tao->stepdirection) PetscCall(VecDuplicate(tao->solution, &tao->stepdirection)); 128 if (!cg->W) PetscCall(VecDuplicate(tao->solution, &cg->W)); 129 if (!cg->work) PetscCall(VecDuplicate(tao->solution, &cg->work)); 130 if (!cg->sk) PetscCall(VecDuplicate(tao->solution, &cg->sk)); 131 if (!cg->yk) PetscCall(VecDuplicate(tao->gradient, &cg->yk)); 132 if (!cg->X_old) PetscCall(VecDuplicate(tao->solution, &cg->X_old)); 133 if (!cg->G_old) PetscCall(VecDuplicate(tao->gradient, &cg->G_old)); 134 if (cg->diag_scaling) { 135 PetscCall(VecDuplicate(tao->solution, &cg->d_work)); 136 PetscCall(VecDuplicate(tao->solution, &cg->y_work)); 137 PetscCall(VecDuplicate(tao->solution, &cg->g_work)); 138 } 139 if (!cg->unprojected_gradient) PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient)); 140 if (!cg->unprojected_gradient_old) PetscCall(VecDuplicate(tao->gradient, &cg->unprojected_gradient_old)); 141 PetscCall(MatLMVMAllocate(cg->B, cg->sk, cg->yk)); 142 if (cg->pc) PetscCall(MatLMVMSetJ0(cg->B, cg->pc)); 143 PetscFunctionReturn(PETSC_SUCCESS); 144 } 145 146 static PetscErrorCode TaoDestroy_BNCG(Tao tao) 147 { 148 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 149 150 PetscFunctionBegin; 151 if (tao->setupcalled) { 152 PetscCall(VecDestroy(&cg->W)); 153 PetscCall(VecDestroy(&cg->work)); 154 PetscCall(VecDestroy(&cg->X_old)); 155 PetscCall(VecDestroy(&cg->G_old)); 156 PetscCall(VecDestroy(&cg->unprojected_gradient)); 157 PetscCall(VecDestroy(&cg->unprojected_gradient_old)); 158 PetscCall(VecDestroy(&cg->g_work)); 159 PetscCall(VecDestroy(&cg->d_work)); 160 PetscCall(VecDestroy(&cg->y_work)); 161 PetscCall(VecDestroy(&cg->sk)); 162 PetscCall(VecDestroy(&cg->yk)); 163 } 164 PetscCall(ISDestroy(&cg->active_lower)); 165 PetscCall(ISDestroy(&cg->active_upper)); 166 PetscCall(ISDestroy(&cg->active_fixed)); 167 PetscCall(ISDestroy(&cg->active_idx)); 168 PetscCall(ISDestroy(&cg->inactive_idx)); 169 PetscCall(ISDestroy(&cg->inactive_old)); 170 PetscCall(ISDestroy(&cg->new_inactives)); 171 PetscCall(MatDestroy(&cg->B)); 172 if (cg->pc) PetscCall(MatDestroy(&cg->pc)); 173 PetscCall(PetscFree(tao->data)); 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 static PetscErrorCode TaoSetFromOptions_BNCG(Tao tao, PetscOptionItems *PetscOptionsObject) 178 { 179 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 180 181 PetscFunctionBegin; 182 PetscOptionsHeadBegin(PetscOptionsObject, "Nonlinear Conjugate Gradient method for unconstrained optimization"); 183 PetscCall(PetscOptionsEnum("-tao_bncg_type", "CG update formula", "TaoBNCGTypes", TaoBNCGTypes, (PetscEnum)cg->cg_type, (PetscEnum *)&cg->cg_type, NULL)); 184 if (cg->cg_type != TAO_BNCG_SSML_BFGS) cg->alpha = -1.0; /* Setting defaults for non-BFGS methods. User can change it below. */ 185 if (TAO_BNCG_GD == cg->cg_type) { 186 cg->cg_type = TAO_BNCG_PCGD; 187 /* Set scaling equal to none or, at best, scalar scaling. */ 188 cg->unscaled_restart = PETSC_TRUE; 189 cg->diag_scaling = PETSC_FALSE; 190 } 191 PetscCall(PetscOptionsReal("-tao_bncg_hz_eta", "(developer) cutoff tolerance for HZ", "", cg->hz_eta, &cg->hz_eta, NULL)); 192 PetscCall(PetscOptionsReal("-tao_bncg_eps", "(developer) cutoff value for restarts", "", cg->epsilon, &cg->epsilon, NULL)); 193 PetscCall(PetscOptionsReal("-tao_bncg_dk_eta", "(developer) cutoff tolerance for DK", "", cg->dk_eta, &cg->dk_eta, NULL)); 194 PetscCall(PetscOptionsReal("-tao_bncg_xi", "(developer) Parameter in the KD method", "", cg->xi, &cg->xi, NULL)); 195 PetscCall(PetscOptionsReal("-tao_bncg_theta", "(developer) update parameter for the Broyden method", "", cg->theta, &cg->theta, NULL)); 196 PetscCall(PetscOptionsReal("-tao_bncg_hz_theta", "(developer) parameter for the HZ (2006) method", "", cg->hz_theta, &cg->hz_theta, NULL)); 197 PetscCall(PetscOptionsReal("-tao_bncg_alpha", "(developer) parameter for the scalar scaling", "", cg->alpha, &cg->alpha, NULL)); 198 PetscCall(PetscOptionsReal("-tao_bncg_bfgs_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->bfgs_scale, &cg->bfgs_scale, NULL)); 199 PetscCall(PetscOptionsReal("-tao_bncg_dfp_scale", "(developer) update parameter for bfgs/brdn CG methods", "", cg->dfp_scale, &cg->dfp_scale, NULL)); 200 PetscCall(PetscOptionsBool("-tao_bncg_diag_scaling", "Enable diagonal Broyden-like preconditioning", "", cg->diag_scaling, &cg->diag_scaling, NULL)); 201 PetscCall(PetscOptionsBool("-tao_bncg_dynamic_restart", "(developer) use dynamic restarts as in HZ, DK, KD", "", cg->use_dynamic_restart, &cg->use_dynamic_restart, NULL)); 202 PetscCall(PetscOptionsBool("-tao_bncg_unscaled_restart", "(developer) use unscaled gradient restarts", "", cg->unscaled_restart, &cg->unscaled_restart, NULL)); 203 PetscCall(PetscOptionsReal("-tao_bncg_zeta", "(developer) Free parameter for the Kou-Dai method", "", cg->zeta, &cg->zeta, NULL)); 204 PetscCall(PetscOptionsInt("-tao_bncg_min_quad", "(developer) Number of iterations with approximate quadratic behavior needed for restart", "", cg->min_quad, &cg->min_quad, NULL)); 205 PetscCall(PetscOptionsInt("-tao_bncg_min_restart_num", "(developer) Number of iterations between restarts (times dimension)", "", cg->min_restart_num, &cg->min_restart_num, NULL)); 206 PetscCall(PetscOptionsBool("-tao_bncg_spaced_restart", "(developer) Enable regular steepest descent restarting every fixed number of iterations", "", cg->spaced_restart, &cg->spaced_restart, NULL)); 207 PetscCall(PetscOptionsBool("-tao_bncg_no_scaling", "Disable all scaling except in restarts", "", cg->no_scaling, &cg->no_scaling, NULL)); 208 if (cg->no_scaling) { 209 cg->diag_scaling = PETSC_FALSE; 210 cg->alpha = -1.0; 211 } 212 if (cg->alpha == -1.0 && cg->cg_type == TAO_BNCG_KD && !cg->diag_scaling) { /* Some more default options that appear to be good. */ 213 cg->neg_xi = PETSC_TRUE; 214 } 215 PetscCall(PetscOptionsBool("-tao_bncg_neg_xi", "(developer) Use negative xi when it might be a smaller descent direction than necessary", "", cg->neg_xi, &cg->neg_xi, NULL)); 216 PetscCall(PetscOptionsEList("-tao_bncg_as_type", "active set estimation method", "", CG_AS_TYPE, CG_AS_SIZE, CG_AS_TYPE[cg->as_type], &cg->as_type, NULL)); 217 PetscCall(PetscOptionsReal("-tao_bncg_as_tol", "(developer) initial tolerance used when estimating actively bounded variables", "", cg->as_tol, &cg->as_tol, NULL)); 218 PetscCall(PetscOptionsReal("-tao_bncg_as_step", "(developer) step length used when estimating actively bounded variables", "", cg->as_step, &cg->as_step, NULL)); 219 PetscCall(PetscOptionsReal("-tao_bncg_delta_min", "(developer) minimum scaling factor used for scaled gradient restarts", "", cg->delta_min, &cg->delta_min, NULL)); 220 PetscCall(PetscOptionsReal("-tao_bncg_delta_max", "(developer) maximum scaling factor used for scaled gradient restarts", "", cg->delta_max, &cg->delta_max, NULL)); 221 222 PetscOptionsHeadEnd(); 223 PetscCall(MatSetOptionsPrefix(cg->B, ((PetscObject)tao)->prefix)); 224 PetscCall(MatAppendOptionsPrefix(cg->B, "tao_bncg_")); 225 PetscCall(MatSetFromOptions(cg->B)); 226 PetscFunctionReturn(PETSC_SUCCESS); 227 } 228 229 static PetscErrorCode TaoView_BNCG(Tao tao, PetscViewer viewer) 230 { 231 PetscBool isascii; 232 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 233 234 PetscFunctionBegin; 235 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 236 if (isascii) { 237 PetscCall(PetscViewerASCIIPushTab(viewer)); 238 PetscCall(PetscViewerASCIIPrintf(viewer, "CG Type: %s\n", TaoBNCGTypes[cg->cg_type])); 239 PetscCall(PetscViewerASCIIPrintf(viewer, "Skipped Stepdirection Updates: %" PetscInt_FMT "\n", cg->skipped_updates)); 240 PetscCall(PetscViewerASCIIPrintf(viewer, "Scaled gradient steps: %" PetscInt_FMT "\n", cg->resets)); 241 PetscCall(PetscViewerASCIIPrintf(viewer, "Pure gradient steps: %" PetscInt_FMT "\n", cg->pure_gd_steps)); 242 PetscCall(PetscViewerASCIIPrintf(viewer, "Not a descent direction: %" PetscInt_FMT "\n", cg->descent_error)); 243 PetscCall(PetscViewerASCIIPrintf(viewer, "Line search fails: %" PetscInt_FMT "\n", cg->ls_fails)); 244 if (cg->diag_scaling) { 245 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 246 if (isascii) { 247 PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO)); 248 PetscCall(MatView(cg->B, viewer)); 249 PetscCall(PetscViewerPopFormat(viewer)); 250 } 251 } 252 PetscCall(PetscViewerASCIIPopTab(viewer)); 253 } 254 PetscFunctionReturn(PETSC_SUCCESS); 255 } 256 257 PetscErrorCode TaoBNCGComputeScalarScaling(PetscReal yty, PetscReal yts, PetscReal sts, PetscReal *scale, PetscReal alpha) 258 { 259 PetscReal a, b, c, sig1, sig2; 260 261 PetscFunctionBegin; 262 *scale = 0.0; 263 if (1.0 == alpha) *scale = yts / yty; 264 else if (0.0 == alpha) *scale = sts / yts; 265 else if (-1.0 == alpha) *scale = 1.0; 266 else { 267 a = yty; 268 b = yts; 269 c = sts; 270 a *= alpha; 271 b *= -(2.0 * alpha - 1.0); 272 c *= alpha - 1.0; 273 sig1 = (-b + PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a); 274 sig2 = (-b - PetscSqrtReal(b * b - 4.0 * a * c)) / (2.0 * a); 275 /* accept the positive root as the scalar */ 276 if (sig1 > 0.0) *scale = sig1; 277 else if (sig2 > 0.0) *scale = sig2; 278 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_CONV_FAILED, "Cannot find positive scalar"); 279 } 280 PetscFunctionReturn(PETSC_SUCCESS); 281 } 282 283 /*MC 284 TAOBNCG - Bound-constrained Nonlinear Conjugate Gradient method. 285 286 Options Database Keys: 287 + -tao_bncg_recycle - enable recycling the latest calculated gradient vector in subsequent TaoSolve() calls (currently disabled) 288 . -tao_bncg_eta <r> - restart tolerance 289 . -tao_bncg_type <taocg_type> - cg formula 290 . -tao_bncg_as_type <none,bertsekas> - active set estimation method 291 . -tao_bncg_as_tol <r> - tolerance used in Bertsekas active-set estimation 292 . -tao_bncg_as_step <r> - trial step length used in Bertsekas active-set estimation 293 . -tao_bncg_eps <r> - cutoff used for determining whether or not we restart based on steplength each iteration, as well as determining whether or not we continue using the last stepdirection. Defaults to machine precision. 294 . -tao_bncg_theta <r> - convex combination parameter for the Broyden method 295 . -tao_bncg_hz_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 296 . -tao_bncg_dk_eta <r> - cutoff tolerance for the beta term in the HZ, DK methods 297 . -tao_bncg_xi <r> - Multiplicative constant of the gamma term in the KD method 298 . -tao_bncg_hz_theta <r> - Multiplicative constant of the theta term for the HZ method 299 . -tao_bncg_bfgs_scale <r> - Scaling parameter of the bfgs contribution to the scalar Broyden method 300 . -tao_bncg_dfp_scale <r> - Scaling parameter of the dfp contribution to the scalar Broyden method 301 . -tao_bncg_diag_scaling <b> - Whether or not to use diagonal initialization/preconditioning for the CG methods. Default True. 302 . -tao_bncg_dynamic_restart <b> - use dynamic restart strategy in the HZ, DK, KD methods 303 . -tao_bncg_unscaled_restart <b> - whether or not to scale the gradient when doing gradient descent restarts 304 . -tao_bncg_zeta <r> - Scaling parameter in the KD method 305 . -tao_bncg_delta_min <r> - Minimum bound for rescaling during restarted gradient descent steps 306 . -tao_bncg_delta_max <r> - Maximum bound for rescaling during restarted gradient descent steps 307 . -tao_bncg_min_quad <i> - Number of quadratic-like steps in a row necessary to do a dynamic restart 308 . -tao_bncg_min_restart_num <i> - This number, x, makes sure there is a gradient descent step every x*n iterations, where n is the dimension of the problem 309 . -tao_bncg_spaced_restart <b> - whether or not to do gradient descent steps every x*n iterations 310 . -tao_bncg_no_scaling <b> - If true, eliminates all scaling, including defaults. 311 - -tao_bncg_neg_xi <b> - Whether or not to use negative xi in the KD method under certain conditions 312 313 Notes: 314 CG formulas are: 315 + "gd" - Gradient Descent 316 . "fr" - Fletcher-Reeves 317 . "pr" - Polak-Ribiere-Polyak 318 . "prp" - Polak-Ribiere-Plus 319 . "hs" - Hestenes-Steifel 320 . "dy" - Dai-Yuan 321 . "ssml_bfgs" - Self-Scaling Memoryless BFGS 322 . "ssml_dfp" - Self-Scaling Memoryless DFP 323 . "ssml_brdn" - Self-Scaling Memoryless Broyden 324 . "hz" - Hager-Zhang (CG_DESCENT 5.3) 325 . "dk" - Dai-Kou (2013) 326 - "kd" - Kou-Dai (2015) 327 328 Level: beginner 329 M*/ 330 331 PETSC_EXTERN PetscErrorCode TaoCreate_BNCG(Tao tao) 332 { 333 TAO_BNCG *cg; 334 const char *morethuente_type = TAOLINESEARCHMT; 335 336 PetscFunctionBegin; 337 tao->ops->setup = TaoSetUp_BNCG; 338 tao->ops->solve = TaoSolve_BNCG; 339 tao->ops->view = TaoView_BNCG; 340 tao->ops->setfromoptions = TaoSetFromOptions_BNCG; 341 tao->ops->destroy = TaoDestroy_BNCG; 342 343 /* Override default settings (unless already changed) */ 344 if (!tao->max_it_changed) tao->max_it = 2000; 345 if (!tao->max_funcs_changed) tao->max_funcs = 4000; 346 347 /* Note: nondefault values should be used for nonlinear conjugate gradient */ 348 /* method. In particular, gtol should be less that 0.5; the value used in */ 349 /* Nocedal and Wright is 0.10. We use the default values for the */ 350 /* linesearch because it seems to work better. */ 351 PetscCall(TaoLineSearchCreate(((PetscObject)tao)->comm, &tao->linesearch)); 352 PetscCall(PetscObjectIncrementTabLevel((PetscObject)tao->linesearch, (PetscObject)tao, 1)); 353 PetscCall(TaoLineSearchSetType(tao->linesearch, morethuente_type)); 354 PetscCall(TaoLineSearchUseTaoRoutines(tao->linesearch, tao)); 355 356 PetscCall(PetscNew(&cg)); 357 tao->data = (void *)cg; 358 PetscCall(KSPInitializePackage()); 359 PetscCall(MatCreate(PetscObjectComm((PetscObject)tao), &cg->B)); 360 PetscCall(PetscObjectIncrementTabLevel((PetscObject)cg->B, (PetscObject)tao, 1)); 361 PetscCall(MatSetType(cg->B, MATLMVMDIAGBROYDEN)); 362 363 cg->pc = NULL; 364 365 cg->dk_eta = 0.5; 366 cg->hz_eta = 0.4; 367 cg->dynamic_restart = PETSC_FALSE; 368 cg->unscaled_restart = PETSC_FALSE; 369 cg->no_scaling = PETSC_FALSE; 370 cg->delta_min = 1e-7; 371 cg->delta_max = 100; 372 cg->theta = 1.0; 373 cg->hz_theta = 1.0; 374 cg->dfp_scale = 1.0; 375 cg->bfgs_scale = 1.0; 376 cg->zeta = 0.1; 377 cg->min_quad = 6; 378 cg->min_restart_num = 6; /* As in CG_DESCENT and KD2015*/ 379 cg->xi = 1.0; 380 cg->neg_xi = PETSC_TRUE; 381 cg->spaced_restart = PETSC_FALSE; 382 cg->tol_quad = 1e-8; 383 cg->as_step = 0.001; 384 cg->as_tol = 0.001; 385 cg->eps_23 = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0 / 3.0); /* Just a little tighter*/ 386 cg->as_type = CG_AS_BERTSEKAS; 387 cg->cg_type = TAO_BNCG_SSML_BFGS; 388 cg->alpha = 1.0; 389 cg->diag_scaling = PETSC_TRUE; 390 PetscFunctionReturn(PETSC_SUCCESS); 391 } 392 393 PetscErrorCode TaoBNCGResetUpdate(Tao tao, PetscReal gnormsq) 394 { 395 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 396 PetscReal scaling; 397 398 PetscFunctionBegin; 399 ++cg->resets; 400 scaling = 2.0 * PetscMax(1.0, PetscAbsScalar(cg->f)) / PetscMax(gnormsq, cg->eps_23); 401 scaling = PetscMin(cg->delta_max, PetscMax(cg->delta_min, scaling)); 402 if (cg->unscaled_restart) { 403 scaling = 1.0; 404 ++cg->pure_gd_steps; 405 } 406 PetscCall(VecAXPBY(tao->stepdirection, -scaling, 0.0, tao->gradient)); 407 /* Also want to reset our diagonal scaling with each restart */ 408 if (cg->diag_scaling) PetscCall(MatLMVMReset(cg->B, PETSC_FALSE)); 409 PetscFunctionReturn(PETSC_SUCCESS); 410 } 411 412 PetscErrorCode TaoBNCGCheckDynamicRestart(Tao tao, PetscReal stepsize, PetscReal gd, PetscReal gd_old, PetscBool *dynrestart, PetscReal fold) 413 { 414 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 415 PetscReal quadinterp; 416 417 PetscFunctionBegin; 418 if (cg->f < cg->min_quad / 10) { 419 *dynrestart = PETSC_FALSE; 420 PetscFunctionReturn(PETSC_SUCCESS); 421 } /* just skip this since this strategy doesn't work well for functions near zero */ 422 quadinterp = 2.0 * (cg->f - fold) / (stepsize * (gd + gd_old)); 423 if (PetscAbs(quadinterp - 1.0) < cg->tol_quad) ++cg->iter_quad; 424 else { 425 cg->iter_quad = 0; 426 *dynrestart = PETSC_FALSE; 427 } 428 if (cg->iter_quad >= cg->min_quad) { 429 cg->iter_quad = 0; 430 *dynrestart = PETSC_TRUE; 431 } 432 PetscFunctionReturn(PETSC_SUCCESS); 433 } 434 435 PETSC_INTERN PetscErrorCode TaoBNCGStepDirectionUpdate(Tao tao, PetscReal gnorm2, PetscReal step, PetscReal fold, PetscReal gnorm2_old, PetscReal dnorm, PetscBool pcgd_fallback) 436 { 437 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 438 PetscReal gamma = 1.0, tau_k, beta; 439 PetscReal tmp = 1.0, ynorm, ynorm2 = 1.0, snorm = 1.0, dk_yk = 1.0, gd; 440 PetscReal gkp1_yk, gd_old, tau_bfgs, tau_dfp, gkp1D_yk, gtDg; 441 PetscInt dim; 442 PetscBool cg_restart = PETSC_FALSE; 443 444 PetscFunctionBegin; 445 /* Local curvature check to see if we need to restart */ 446 if (tao->niter >= 1 || tao->recycle) { 447 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 448 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 449 ynorm2 = ynorm * ynorm; 450 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 451 if (step * dnorm < PETSC_MACHINE_EPSILON || step * dk_yk < PETSC_MACHINE_EPSILON) { 452 cg_restart = PETSC_TRUE; 453 ++cg->skipped_updates; 454 } 455 if (cg->spaced_restart) { 456 PetscCall(VecGetSize(tao->gradient, &dim)); 457 if (tao->niter % (dim * cg->min_restart_num)) cg_restart = PETSC_TRUE; 458 } 459 } 460 /* If the user wants regular restarts, do it every 6n iterations, where n=dimension */ 461 if (cg->spaced_restart) { 462 PetscCall(VecGetSize(tao->gradient, &dim)); 463 if (0 == tao->niter % (6 * dim)) cg_restart = PETSC_TRUE; 464 } 465 /* Compute the diagonal scaling vector if applicable */ 466 if (cg->diag_scaling) PetscCall(MatLMVMUpdate(cg->B, tao->solution, tao->gradient)); 467 468 /* A note on diagonal scaling (to be added to paper): 469 For the FR, PR, PRP, and DY methods, the diagonally scaled versions 470 must be derived as a preconditioned CG method rather than as 471 a Hessian initialization like in the Broyden methods. */ 472 473 /* In that case, one writes the objective function as 474 f(x) \equiv f(Ay). Gradient evaluations yield g(x_k) = A g(Ay_k) = A g(x_k). 475 Furthermore, the direction d_k \equiv (x_k - x_{k-1})/step according to 476 HZ (2006) becomes A^{-1} d_k, such that d_k^T g_k remains the 477 same under preconditioning. Note that A is diagonal, such that A^T = A. */ 478 479 /* This yields questions like what the dot product d_k^T y_k 480 should look like. HZ mistakenly treats that as the same under 481 preconditioning, but that is not necessarily true. */ 482 483 /* Observe y_k \equiv g_k - g_{k-1}, and under the P.C. transformation, 484 we get d_k^T y_k = (d_k^T A_k^{-T} A_k g_k - d_k^T A_k^{-T} A_{k-1} g_{k-1}), 485 yielding d_k^T y_k = d_k^T g_k - d_k^T (A_k^{-T} A_{k-1} g_{k-1}), which is 486 NOT the same if our preconditioning matrix is updated between iterations. 487 This same issue is found when considering dot products of the form g_{k+1}^T y_k. */ 488 489 /* Compute CG step direction */ 490 if (cg_restart) { 491 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 492 } else if (pcgd_fallback) { 493 /* Just like preconditioned CG */ 494 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 495 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 496 } else if (ynorm2 > PETSC_MACHINE_EPSILON) { 497 switch (cg->cg_type) { 498 case TAO_BNCG_PCGD: 499 if (!cg->diag_scaling) { 500 if (!cg->no_scaling) { 501 cg->sts = step * step * dnorm * dnorm; 502 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha)); 503 } else { 504 tau_k = 1.0; 505 ++cg->pure_gd_steps; 506 } 507 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, 0.0, tao->gradient)); 508 } else { 509 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 510 PetscCall(VecAXPBY(tao->stepdirection, -1.0, 0.0, cg->g_work)); 511 } 512 break; 513 514 case TAO_BNCG_HS: 515 /* Classic Hestenes-Stiefel method, modified with scalar and diagonal preconditioning. */ 516 if (!cg->diag_scaling) { 517 cg->sts = step * step * dnorm * dnorm; 518 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 519 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->sts, &tau_k, cg->alpha)); 520 beta = tau_k * gkp1_yk / dk_yk; 521 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 522 } else { 523 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 524 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 525 beta = gkp1_yk / dk_yk; 526 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 527 } 528 break; 529 530 case TAO_BNCG_FR: 531 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 532 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 533 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 534 ynorm2 = ynorm * ynorm; 535 PetscCall(VecDot(cg->yk, tao->stepdirection, &dk_yk)); 536 if (!cg->diag_scaling) { 537 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, step * step * dnorm * dnorm, &tau_k, cg->alpha)); 538 beta = tau_k * gnorm2 / gnorm2_old; 539 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 540 } else { 541 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Before it's updated */ 542 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 543 PetscCall(VecDot(tao->gradient, cg->g_work, &tmp)); 544 beta = tmp / gnorm2_old; 545 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 546 } 547 break; 548 549 case TAO_BNCG_PRP: 550 snorm = step * dnorm; 551 if (!cg->diag_scaling) { 552 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 553 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 554 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha)); 555 beta = tau_k * gkp1_yk / gnorm2_old; 556 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 557 } else { 558 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); 559 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 560 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 561 beta = gkp1_yk / gnorm2_old; 562 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 563 } 564 break; 565 566 case TAO_BNCG_PRP_PLUS: 567 PetscCall(VecWAXPY(cg->yk, -1.0, cg->G_old, tao->gradient)); 568 PetscCall(VecNorm(cg->yk, NORM_2, &ynorm)); 569 ynorm2 = ynorm * ynorm; 570 if (!cg->diag_scaling) { 571 PetscCall(VecDot(cg->G_old, cg->G_old, &gnorm2_old)); 572 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 573 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, snorm * snorm, &tau_k, cg->alpha)); 574 beta = tau_k * gkp1_yk / gnorm2_old; 575 beta = PetscMax(beta, 0.0); 576 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 577 } else { 578 PetscCall(VecDot(cg->G_old, cg->g_work, &gnorm2_old)); /* Old gtDg */ 579 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 580 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 581 beta = gkp1_yk / gnorm2_old; 582 beta = PetscMax(beta, 0.0); 583 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 584 } 585 break; 586 587 case TAO_BNCG_DY: 588 /* Dai, Yu-Hong, and Yaxiang Yuan. "A nonlinear conjugate gradient method with a strong global convergence property." 589 SIAM Journal on optimization 10, no. 1 (1999): 177-182. */ 590 if (!cg->diag_scaling) { 591 PetscCall(VecDot(tao->stepdirection, tao->gradient, &gd)); 592 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 593 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, step * dk_yk, cg->yts, &tau_k, cg->alpha)); 594 beta = tau_k * gnorm2 / (gd - gd_old); 595 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 596 } else { 597 PetscCall(MatMult(cg->B, tao->stepdirection, cg->d_work)); 598 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 599 PetscCall(VecDot(cg->g_work, tao->gradient, >Dg)); 600 PetscCall(VecDot(tao->stepdirection, cg->G_old, &gd_old)); 601 PetscCall(VecDot(cg->d_work, cg->g_work, &dk_yk)); 602 dk_yk = dk_yk - gd_old; 603 beta = gtDg / dk_yk; 604 PetscCall(VecScale(cg->d_work, beta)); 605 PetscCall(VecWAXPY(tao->stepdirection, -1.0, cg->g_work, cg->d_work)); 606 } 607 break; 608 609 case TAO_BNCG_HZ: 610 /* Hager, William W., and Hongchao Zhang. "Algorithm 851: CG_DESCENT, a conjugate gradient method with guaranteed descent." 611 ACM Transactions on Mathematical Software (TOMS) 32, no. 1 (2006): 113-137. */ 612 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 613 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 614 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 615 snorm = dnorm * step; 616 cg->yts = step * dk_yk; 617 if (cg->use_dynamic_restart) PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 618 if (cg->dynamic_restart) { 619 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 620 } else { 621 if (!cg->diag_scaling) { 622 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 623 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha)); 624 /* Supplying cg->alpha = -1.0 will give the CG_DESCENT 5.3 special case of tau_k = 1.0 */ 625 tmp = gd / dk_yk; 626 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk)); 627 /* Bound beta as in CG_DESCENT 5.3, as implemented, with the third comparison from DK 2013 */ 628 beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm)); 629 /* d <- -t*g + beta*t*d */ 630 PetscCall(VecAXPBY(tao->stepdirection, -tau_k, beta, tao->gradient)); 631 } else { 632 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 633 cg->yty = ynorm2; 634 cg->sts = snorm * snorm; 635 /* Apply the diagonal scaling to all my vectors */ 636 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 637 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 638 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 639 /* Construct the constant ytDgkp1 */ 640 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 641 /* Construct the constant for scaling Dkyk in the update */ 642 tmp = gd / dk_yk; 643 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 644 tau_k = -tau_k * gd / (dk_yk * dk_yk); 645 /* beta is the constant which adds the dk contribution */ 646 beta = gkp1_yk / dk_yk + cg->hz_theta * tau_k; /* HZ; (1.15) from DK 2013 */ 647 /* From HZ2013, modified to account for diagonal scaling*/ 648 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 649 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 650 beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm)); 651 /* Do the update */ 652 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 653 } 654 } 655 break; 656 657 case TAO_BNCG_DK: 658 /* Dai, Yu-Hong, and Cai-Xia Kou. "A nonlinear conjugate gradient algorithm with an optimal property and an improved Wolfe line search." 659 SIAM Journal on Optimization 23, no. 1 (2013): 296-320. */ 660 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 661 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 662 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 663 snorm = step * dnorm; 664 cg->yts = dk_yk * step; 665 if (!cg->diag_scaling) { 666 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 667 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha)); 668 /* Use cg->alpha = -1.0 to get tau_k = 1.0 as in CG_DESCENT 5.3 */ 669 tmp = gd / dk_yk; 670 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk) + gd / (dnorm * dnorm)) - step * gd / dk_yk; 671 beta = PetscMax(PetscMax(beta, cg->hz_eta * tau_k * gd_old / (dnorm * dnorm)), cg->dk_eta * tau_k * gd / (dnorm * dnorm)); 672 /* d <- -t*g + beta*t*d */ 673 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, 0.0, beta, tao->gradient, cg->yk)); 674 } else { 675 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 676 cg->yty = ynorm2; 677 cg->sts = snorm * snorm; 678 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 679 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 680 PetscCall(MatSolve(cg->B, tao->stepdirection, cg->d_work)); 681 /* Construct the constant ytDgkp1 */ 682 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1_yk)); 683 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 684 tau_k = tau_k * gd / (dk_yk * dk_yk); 685 tmp = gd / dk_yk; 686 /* beta is the constant which adds the dk contribution */ 687 beta = gkp1_yk / dk_yk - step * tmp - tau_k; 688 /* Update this for the last term in beta */ 689 PetscCall(VecDot(cg->y_work, tao->stepdirection, &dk_yk)); 690 beta += tmp * dk_yk / (dnorm * dnorm); /* projection of y_work onto dk */ 691 PetscCall(VecDot(tao->stepdirection, cg->g_work, &gd)); 692 PetscCall(VecDot(cg->G_old, cg->d_work, &gd_old)); 693 beta = PetscMax(PetscMax(beta, cg->hz_eta * gd_old / (dnorm * dnorm)), cg->dk_eta * gd / (dnorm * dnorm)); 694 /* Do the update */ 695 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 696 } 697 break; 698 699 case TAO_BNCG_KD: 700 /* Kou, Cai-Xia, and Yu-Hong Dai. "A modified self-scaling memoryless Broyden-Fletcher-Goldfarb-Shanno method for unconstrained optimization." 701 Journal of Optimization Theory and Applications 165, no. 1 (2015): 209-224. */ 702 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 703 PetscCall(VecDot(cg->G_old, tao->stepdirection, &gd_old)); 704 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 705 snorm = step * dnorm; 706 cg->yts = dk_yk * step; 707 if (cg->use_dynamic_restart) PetscCall(TaoBNCGCheckDynamicRestart(tao, step, gd, gd_old, &cg->dynamic_restart, fold)); 708 if (cg->dynamic_restart) { 709 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 710 } else { 711 if (!cg->diag_scaling) { 712 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 713 PetscCall(TaoBNCGComputeScalarScaling(ynorm2, cg->yts, snorm * snorm, &tau_k, cg->alpha)); 714 beta = tau_k * (gkp1_yk / dk_yk - ynorm2 * gd / (dk_yk * dk_yk)) - step * gd / dk_yk; 715 if (beta < cg->zeta * tau_k * gd / (dnorm * dnorm)) /* 0.1 is KD's zeta parameter */ 716 { 717 beta = cg->zeta * tau_k * gd / (dnorm * dnorm); 718 gamma = 0.0; 719 } else { 720 if (gkp1_yk < 0 && cg->neg_xi) gamma = -1.0 * gd / dk_yk; 721 /* This seems to be very effective when there's no tau_k scaling. 722 This guarantees a large descent step every iteration, going through DK 2015 Lemma 3.1's proof but allowing for negative xi */ 723 else gamma = cg->xi * gd / dk_yk; 724 } 725 /* d <- -t*g + beta*t*d + t*tmp*yk */ 726 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, gamma * tau_k, beta, tao->gradient, cg->yk)); 727 } else { 728 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 729 cg->yty = ynorm2; 730 cg->sts = snorm * snorm; 731 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 732 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 733 /* Construct the constant ytDgkp1 */ 734 PetscCall(VecDot(cg->yk, cg->g_work, &gkp1D_yk)); 735 /* Construct the constant for scaling Dkyk in the update */ 736 gamma = gd / dk_yk; 737 /* tau_k = -ytDy/(ytd)^2 * gd */ 738 PetscCall(VecDot(cg->yk, cg->y_work, &tau_k)); 739 tau_k = tau_k * gd / (dk_yk * dk_yk); 740 /* beta is the constant which adds the d_k contribution */ 741 beta = gkp1D_yk / dk_yk - step * gamma - tau_k; 742 /* Here is the requisite check */ 743 PetscCall(VecDot(tao->stepdirection, cg->g_work, &tmp)); 744 if (cg->neg_xi) { 745 /* modified KD implementation */ 746 if (gkp1D_yk / dk_yk < 0) gamma = -1.0 * gd / dk_yk; 747 else gamma = cg->xi * gd / dk_yk; 748 if (beta < cg->zeta * tmp / (dnorm * dnorm)) { 749 beta = cg->zeta * tmp / (dnorm * dnorm); 750 gamma = 0.0; 751 } 752 } else { /* original KD 2015 implementation */ 753 if (beta < cg->zeta * tmp / (dnorm * dnorm)) { 754 beta = cg->zeta * tmp / (dnorm * dnorm); 755 gamma = 0.0; 756 } else gamma = cg->xi * gd / dk_yk; 757 } 758 /* Do the update in two steps */ 759 PetscCall(VecAXPBY(tao->stepdirection, -1.0, beta, cg->g_work)); 760 PetscCall(VecAXPY(tao->stepdirection, gamma, cg->y_work)); 761 } 762 } 763 break; 764 765 case TAO_BNCG_SSML_BFGS: 766 /* Perry, J. M. "A class of conjugate gradient algorithms with a two-step variable-metric memory." 767 Discussion Papers 269 (1977). */ 768 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 769 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 770 snorm = step * dnorm; 771 cg->yts = dk_yk * step; 772 cg->yty = ynorm2; 773 cg->sts = snorm * snorm; 774 if (!cg->diag_scaling) { 775 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 776 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 777 tmp = gd / dk_yk; 778 beta = tau_k * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * tmp; 779 /* d <- -t*g + beta*t*d + t*tmp*yk */ 780 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp * tau_k, beta, tao->gradient, cg->yk)); 781 } else { 782 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless BFGS step */ 783 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 784 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 785 /* compute scalar gamma */ 786 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 787 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 788 gamma = gd / dk_yk; 789 /* Compute scalar beta */ 790 beta = (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk; 791 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 792 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 793 } 794 break; 795 796 case TAO_BNCG_SSML_DFP: 797 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 798 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 799 snorm = step * dnorm; 800 cg->yts = dk_yk * step; 801 cg->yty = ynorm2; 802 cg->sts = snorm * snorm; 803 if (!cg->diag_scaling) { 804 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 805 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, cg->yts, cg->sts, &tau_k, cg->alpha)); 806 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 807 tau_k = cg->dfp_scale * tau_k; 808 tmp = tau_k * gkp1_yk / cg->yty; 809 beta = -step * gd / dk_yk; 810 /* d <- -t*g + beta*d + tmp*yk */ 811 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 812 } else { 813 /* We have diagonal scaling enabled and are taking a diagonally-scaled memoryless DFP step */ 814 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 815 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 816 /* compute scalar gamma */ 817 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 818 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 819 gamma = (gkp1_yk / tmp); 820 /* Compute scalar beta */ 821 beta = -step * gd / dk_yk; 822 /* Compute stepdirection d_kp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 823 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 824 } 825 break; 826 827 case TAO_BNCG_SSML_BRDN: 828 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 829 PetscCall(VecWAXPY(cg->sk, -1.0, cg->X_old, tao->solution)); 830 snorm = step * dnorm; 831 cg->yts = step * dk_yk; 832 cg->yty = ynorm2; 833 cg->sts = snorm * snorm; 834 if (!cg->diag_scaling) { 835 /* Instead of a regular convex combination, we will solve a quadratic formula. */ 836 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_bfgs, cg->bfgs_scale)); 837 PetscCall(TaoBNCGComputeScalarScaling(cg->yty, step * dk_yk, snorm * snorm, &tau_dfp, cg->dfp_scale)); 838 PetscCall(VecDot(cg->yk, tao->gradient, &gkp1_yk)); 839 tau_k = cg->theta * tau_bfgs + (1.0 - cg->theta) * tau_dfp; 840 /* If bfgs_scale = 1.0, it should reproduce the bfgs tau_bfgs. If bfgs_scale = 0.0, 841 it should reproduce the tau_dfp scaling. Same with dfp_scale. */ 842 tmp = cg->theta * tau_bfgs * gd / dk_yk + (1 - cg->theta) * tau_dfp * gkp1_yk / cg->yty; 843 beta = cg->theta * tau_bfgs * (gkp1_yk / dk_yk - cg->yty * gd / (dk_yk * dk_yk)) - step * gd / dk_yk; 844 /* d <- -t*g + beta*d + tmp*yk */ 845 PetscCall(VecAXPBYPCZ(tao->stepdirection, -tau_k, tmp, beta, tao->gradient, cg->yk)); 846 } else { 847 /* We have diagonal scaling enabled */ 848 PetscCall(MatSolve(cg->B, tao->gradient, cg->g_work)); 849 PetscCall(MatSolve(cg->B, cg->yk, cg->y_work)); 850 /* compute scalar gamma */ 851 PetscCall(VecDot(cg->g_work, cg->yk, &gkp1_yk)); 852 PetscCall(VecDot(cg->y_work, cg->yk, &tmp)); 853 gamma = cg->theta * gd / dk_yk + (1 - cg->theta) * (gkp1_yk / tmp); 854 /* Compute scalar beta */ 855 beta = cg->theta * (gkp1_yk / dk_yk - gd * tmp / (dk_yk * dk_yk)) - step * gd / dk_yk; 856 /* Compute stepdirection dkp1 = gamma*Dkyk + beta*dk - Dkgkp1 */ 857 PetscCall(VecAXPBYPCZ(tao->stepdirection, -1.0, gamma, beta, cg->g_work, cg->y_work)); 858 } 859 break; 860 861 default: 862 break; 863 } 864 } 865 PetscFunctionReturn(PETSC_SUCCESS); 866 } 867 868 PETSC_INTERN PetscErrorCode TaoBNCGConductIteration(Tao tao, PetscReal gnorm) 869 { 870 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 871 TaoLineSearchConvergedReason ls_status = TAOLINESEARCH_CONTINUE_ITERATING; 872 PetscReal step = 1.0, gnorm2, gd, dnorm = 0.0; 873 PetscReal gnorm2_old, f_old, resnorm, gnorm_old; 874 PetscBool pcgd_fallback = PETSC_FALSE; 875 876 PetscFunctionBegin; 877 /* We are now going to perform a line search along the direction. */ 878 /* Store solution and gradient info before it changes */ 879 PetscCall(VecCopy(tao->solution, cg->X_old)); 880 PetscCall(VecCopy(tao->gradient, cg->G_old)); 881 PetscCall(VecCopy(cg->unprojected_gradient, cg->unprojected_gradient_old)); 882 883 gnorm_old = gnorm; 884 gnorm2_old = gnorm_old * gnorm_old; 885 f_old = cg->f; 886 /* Perform bounded line search. If we are recycling a solution from a previous */ 887 /* TaoSolve, then we want to immediately skip to calculating a new direction rather than performing a linesearch */ 888 if (!(tao->recycle && 0 == tao->niter)) { 889 /* Above logic: the below code happens every iteration, except for the first iteration of a recycled TaoSolve */ 890 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 891 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 892 PetscCall(TaoAddLineSearchCounts(tao)); 893 894 /* Check linesearch failure */ 895 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 896 ++cg->ls_fails; 897 if (cg->cg_type == TAO_BNCG_GD) { 898 /* Nothing left to do but fail out of the optimization */ 899 step = 0.0; 900 tao->reason = TAO_DIVERGED_LS_FAILURE; 901 } else { 902 /* Restore previous point, perform preconditioned GD and regular GD steps at the last good point */ 903 PetscCall(VecCopy(cg->X_old, tao->solution)); 904 PetscCall(VecCopy(cg->G_old, tao->gradient)); 905 PetscCall(VecCopy(cg->unprojected_gradient_old, cg->unprojected_gradient)); 906 gnorm = gnorm_old; 907 gnorm2 = gnorm2_old; 908 cg->f = f_old; 909 910 /* Fall back on preconditioned CG (so long as you're not already using it) */ 911 if (cg->cg_type != TAO_BNCG_PCGD && cg->diag_scaling) { 912 pcgd_fallback = PETSC_TRUE; 913 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 914 915 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 916 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 917 918 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 919 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 920 PetscCall(TaoAddLineSearchCounts(tao)); 921 922 pcgd_fallback = PETSC_FALSE; 923 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 924 /* Going to perform a regular gradient descent step. */ 925 ++cg->ls_fails; 926 step = 0.0; 927 } 928 } 929 /* Fall back on the scaled gradient step */ 930 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 931 ++cg->ls_fails; 932 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 933 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 934 PetscCall(TaoLineSearchSetInitialStepLength(tao->linesearch, 1.0)); 935 PetscCall(TaoLineSearchApply(tao->linesearch, tao->solution, &cg->f, cg->unprojected_gradient, tao->stepdirection, &step, &ls_status)); 936 PetscCall(TaoAddLineSearchCounts(tao)); 937 } 938 939 if (ls_status != TAOLINESEARCH_SUCCESS && ls_status != TAOLINESEARCH_SUCCESS_USER) { 940 /* Nothing left to do but fail out of the optimization */ 941 ++cg->ls_fails; 942 step = 0.0; 943 tao->reason = TAO_DIVERGED_LS_FAILURE; 944 } else { 945 /* One of the fallbacks worked. Set them both back equal to false. */ 946 pcgd_fallback = PETSC_FALSE; 947 } 948 } 949 } 950 /* Convergence test for line search failure */ 951 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS); 952 953 /* Standard convergence test */ 954 PetscCall(VecFischer(tao->solution, cg->unprojected_gradient, tao->XL, tao->XU, cg->W)); 955 PetscCall(VecNorm(cg->W, NORM_2, &resnorm)); 956 PetscCheck(!PetscIsInfOrNanReal(resnorm), PetscObjectComm((PetscObject)tao), PETSC_ERR_USER, "User provided compute function generated Inf or NaN"); 957 PetscCall(TaoLogConvergenceHistory(tao, cg->f, resnorm, 0.0, tao->ksp_its)); 958 PetscCall(TaoMonitor(tao, tao->niter, cg->f, resnorm, 0.0, step)); 959 PetscUseTypeMethod(tao, convergencetest, tao->cnvP); 960 if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(PETSC_SUCCESS); 961 } 962 /* Assert we have an updated step and we need at least one more iteration. */ 963 /* Calculate the next direction */ 964 /* Estimate the active set at the new solution */ 965 PetscCall(TaoBNCGEstimateActiveSet(tao, cg->as_type)); 966 /* Compute the projected gradient and its norm */ 967 PetscCall(VecCopy(cg->unprojected_gradient, tao->gradient)); 968 if (cg->active_idx) PetscCall(VecISSet(tao->gradient, cg->active_idx, 0.0)); 969 PetscCall(VecNorm(tao->gradient, NORM_2, &gnorm)); 970 gnorm2 = gnorm * gnorm; 971 972 /* Calculate some quantities used in the StepDirectionUpdate. */ 973 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 974 /* Update the step direction. */ 975 PetscCall(TaoBNCGStepDirectionUpdate(tao, gnorm2, step, f_old, gnorm2_old, dnorm, pcgd_fallback)); 976 ++tao->niter; 977 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 978 979 if (cg->cg_type != TAO_BNCG_GD) { 980 /* Figure out which previously active variables became inactive this iteration */ 981 PetscCall(ISDestroy(&cg->new_inactives)); 982 if (cg->inactive_idx && cg->inactive_old) PetscCall(ISDifference(cg->inactive_idx, cg->inactive_old, &cg->new_inactives)); 983 /* Selectively reset the CG step those freshly inactive variables */ 984 if (cg->new_inactives) { 985 PetscCall(VecGetSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 986 PetscCall(VecGetSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 987 PetscCall(VecCopy(cg->inactive_grad, cg->inactive_step)); 988 PetscCall(VecScale(cg->inactive_step, -1.0)); 989 PetscCall(VecRestoreSubVector(tao->stepdirection, cg->new_inactives, &cg->inactive_step)); 990 PetscCall(VecRestoreSubVector(cg->unprojected_gradient, cg->new_inactives, &cg->inactive_grad)); 991 } 992 /* Verify that this is a descent direction */ 993 PetscCall(VecDot(tao->gradient, tao->stepdirection, &gd)); 994 PetscCall(VecNorm(tao->stepdirection, NORM_2, &dnorm)); 995 if (PetscIsInfOrNanReal(gd) || (gd / (dnorm * dnorm) <= -1e10 || gd / (dnorm * dnorm) >= -1e-10)) { 996 /* Not a descent direction, so we reset back to projected gradient descent */ 997 PetscCall(TaoBNCGResetUpdate(tao, gnorm2)); 998 PetscCall(TaoBNCGBoundStep(tao, cg->as_type, tao->stepdirection)); 999 ++cg->descent_error; 1000 } else { 1001 } 1002 } 1003 PetscFunctionReturn(PETSC_SUCCESS); 1004 } 1005 1006 PETSC_INTERN PetscErrorCode TaoBNCGSetH0(Tao tao, Mat H0) 1007 { 1008 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 1009 PetscBool same; 1010 1011 PetscFunctionBegin; 1012 PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same)); 1013 if (same) { 1014 PetscCall(PetscObjectReference((PetscObject)H0)); 1015 cg->pc = H0; 1016 } 1017 PetscFunctionReturn(PETSC_SUCCESS); 1018 } 1019 1020 /*@ 1021 TaoBNCGGetType - Return the type for the `TAOBNCG` solver 1022 1023 Input Parameter: 1024 . tao - the `Tao` solver context 1025 1026 Output Parameter: 1027 . type - `TAOBNCG` type 1028 1029 Level: advanced 1030 1031 .seealso: `Tao`, `TAOBNCG`, `TaoBNCGSetType()`, `TaoBNCGType` 1032 @*/ 1033 PetscErrorCode TaoBNCGGetType(Tao tao, TaoBNCGType *type) 1034 { 1035 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 1036 PetscBool same; 1037 1038 PetscFunctionBegin; 1039 PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same)); 1040 PetscCheck(same, PetscObjectComm((PetscObject)tao), PETSC_ERR_ARG_INCOMP, "TAO solver is not BNCG type"); 1041 *type = cg->cg_type; 1042 PetscFunctionReturn(PETSC_SUCCESS); 1043 } 1044 1045 /*@ 1046 TaoBNCGSetType - Set the type for the `TAOBNCG` solver 1047 1048 Input Parameters: 1049 + tao - the `Tao` solver context 1050 - type - `TAOBNCG` type 1051 1052 Level: advanced 1053 1054 .seealso: `Tao`, `TAOBNCG`, `TaoBNCGGetType()`, `TaoBNCGType` 1055 @*/ 1056 PetscErrorCode TaoBNCGSetType(Tao tao, TaoBNCGType type) 1057 { 1058 TAO_BNCG *cg = (TAO_BNCG *)tao->data; 1059 PetscBool same; 1060 1061 PetscFunctionBegin; 1062 PetscCall(PetscObjectTypeCompare((PetscObject)tao, TAOBNCG, &same)); 1063 if (same) cg->cg_type = type; 1064 PetscFunctionReturn(PETSC_SUCCESS); 1065 } 1066